Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
4 : * Written by Alex Tomas <alex@clusterfs.com>
5 : *
6 : * Architecture independence:
7 : * Copyright (c) 2005, Bull S.A.
8 : * Written by Pierre Peiffer <pierre.peiffer@bull.net>
9 : */
10 :
11 : /*
12 : * Extents support for EXT4
13 : *
14 : * TODO:
15 : * - ext4*_error() should be used in some situations
16 : * - analyze all BUG()/BUG_ON(), use -EIO where appropriate
17 : * - smart tree reduction
18 : */
19 :
20 : #include <linux/fs.h>
21 : #include <linux/time.h>
22 : #include <linux/jbd2.h>
23 : #include <linux/highuid.h>
24 : #include <linux/pagemap.h>
25 : #include <linux/quotaops.h>
26 : #include <linux/string.h>
27 : #include <linux/slab.h>
28 : #include <linux/uaccess.h>
29 : #include <linux/fiemap.h>
30 : #include <linux/iomap.h>
31 : #include <linux/sched/mm.h>
32 : #include "ext4_jbd2.h"
33 : #include "ext4_extents.h"
34 : #include "xattr.h"
35 :
36 : #include <trace/events/ext4.h>
37 :
38 : /*
39 : * used by extent splitting.
40 : */
41 : #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
42 : due to ENOSPC */
43 : #define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */
44 : #define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */
45 :
46 : #define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
47 : #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
48 :
49 10991192 : static __le32 ext4_extent_block_csum(struct inode *inode,
50 : struct ext4_extent_header *eh)
51 : {
52 10991192 : struct ext4_inode_info *ei = EXT4_I(inode);
53 10991192 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
54 10991192 : __u32 csum;
55 :
56 10991192 : csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh,
57 10991192 : EXT4_EXTENT_TAIL_OFFSET(eh));
58 10991297 : return cpu_to_le32(csum);
59 : }
60 :
61 8420 : static int ext4_extent_block_csum_verify(struct inode *inode,
62 : struct ext4_extent_header *eh)
63 : {
64 8420 : struct ext4_extent_tail *et;
65 :
66 8420 : if (!ext4_has_metadata_csum(inode->i_sb))
67 : return 1;
68 :
69 8420 : et = find_ext4_extent_tail(eh);
70 8420 : if (et->et_checksum != ext4_extent_block_csum(inode, eh))
71 0 : return 0;
72 : return 1;
73 : }
74 :
75 10983221 : static void ext4_extent_block_csum_set(struct inode *inode,
76 : struct ext4_extent_header *eh)
77 : {
78 10983221 : struct ext4_extent_tail *et;
79 :
80 10983221 : if (!ext4_has_metadata_csum(inode->i_sb))
81 : return;
82 :
83 10982742 : et = find_ext4_extent_tail(eh);
84 10982742 : et->et_checksum = ext4_extent_block_csum(inode, eh);
85 : }
86 :
87 : static int ext4_split_extent_at(handle_t *handle,
88 : struct inode *inode,
89 : struct ext4_ext_path **ppath,
90 : ext4_lblk_t split,
91 : int split_flag,
92 : int flags);
93 :
94 68 : static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
95 : {
96 : /*
97 : * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
98 : * moment, get_block can be called only for blocks inside i_size since
99 : * page cache has been already dropped and writes are blocked by
100 : * i_rwsem. So we can safely drop the i_data_sem here.
101 : */
102 68 : BUG_ON(EXT4_JOURNAL(inode) == NULL);
103 68 : ext4_discard_preallocations(inode, 0);
104 68 : up_write(&EXT4_I(inode)->i_data_sem);
105 68 : *dropped = 1;
106 68 : return 0;
107 : }
108 :
109 24470731 : static void ext4_ext_drop_refs(struct ext4_ext_path *path)
110 : {
111 24470731 : int depth, i;
112 :
113 24470731 : if (!path)
114 : return;
115 19153343 : depth = path->p_depth;
116 56696559 : for (i = 0; i <= depth; i++, path++) {
117 37549303 : brelse(path->p_bh);
118 37543216 : path->p_bh = NULL;
119 : }
120 : }
121 :
122 1448298 : void ext4_free_ext_path(struct ext4_ext_path *path)
123 : {
124 1448298 : ext4_ext_drop_refs(path);
125 17119913 : kfree(path);
126 994 : }
127 :
128 : /*
129 : * Make sure 'handle' has at least 'check_cred' credits. If not, restart
130 : * transaction with 'restart_cred' credits. The function drops i_data_sem
131 : * when restarting transaction and gets it after transaction is restarted.
132 : *
133 : * The function returns 0 on success, 1 if transaction had to be restarted,
134 : * and < 0 in case of fatal error.
135 : */
136 2673111 : int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
137 : int check_cred, int restart_cred,
138 : int revoke_cred)
139 : {
140 2673111 : int ret;
141 2673111 : int dropped = 0;
142 :
143 2673179 : ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
144 : revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped));
145 2673085 : if (dropped)
146 68 : down_write(&EXT4_I(inode)->i_data_sem);
147 2673085 : return ret;
148 : }
149 :
150 : /*
151 : * could return:
152 : * - EROFS
153 : * - ENOMEM
154 : */
155 14020935 : static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
156 : struct ext4_ext_path *path)
157 : {
158 14020935 : int err = 0;
159 :
160 14020935 : if (path->p_bh) {
161 : /* path points to block */
162 10955670 : BUFFER_TRACE(path->p_bh, "get_write_access");
163 10955670 : err = ext4_journal_get_write_access(handle, inode->i_sb,
164 : path->p_bh, EXT4_JTR_NONE);
165 : /*
166 : * The extent buffer's verified bit will be set again in
167 : * __ext4_ext_dirty(). We could leave an inconsistent
168 : * buffer if the extents updating procudure break off du
169 : * to some error happens, force to check it again.
170 : */
171 10955899 : if (!err)
172 10955898 : clear_buffer_verified(path->p_bh);
173 : }
174 : /* path points to leaf/index in inode body */
175 : /* we use in-core data, no need to protect them */
176 14021292 : return err;
177 : }
178 :
179 : /*
180 : * could return:
181 : * - EROFS
182 : * - ENOMEM
183 : * - EIO
184 : */
185 14020738 : static int __ext4_ext_dirty(const char *where, unsigned int line,
186 : handle_t *handle, struct inode *inode,
187 : struct ext4_ext_path *path)
188 : {
189 14020738 : int err;
190 :
191 14020738 : WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
192 14020738 : if (path->p_bh) {
193 10913502 : ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
194 : /* path points to block */
195 10913564 : err = __ext4_handle_dirty_metadata(where, line, handle,
196 : inode, path->p_bh);
197 : /* Extents updating done, re-set verified flag */
198 10913543 : if (!err)
199 10913541 : set_buffer_verified(path->p_bh);
200 : } else {
201 : /* path points to leaf/index in inode body */
202 3107236 : err = ext4_mark_inode_dirty(handle, inode);
203 : }
204 14022158 : return err;
205 : }
206 :
207 : #define ext4_ext_dirty(handle, inode, path) \
208 : __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
209 :
210 3999935 : static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
211 : struct ext4_ext_path *path,
212 : ext4_lblk_t block)
213 : {
214 3999935 : if (path) {
215 3999935 : int depth = path->p_depth;
216 3999935 : struct ext4_extent *ex;
217 :
218 : /*
219 : * Try to predict block placement assuming that we are
220 : * filling in a file which will eventually be
221 : * non-sparse --- i.e., in the case of libbfd writing
222 : * an ELF object sections out-of-order but in a way
223 : * the eventually results in a contiguous object or
224 : * executable file, or some database extending a table
225 : * space file. However, this is actually somewhat
226 : * non-ideal if we are writing a sparse file such as
227 : * qemu or KVM writing a raw image file that is going
228 : * to stay fairly sparse, since it will end up
229 : * fragmenting the file system's free space. Maybe we
230 : * should have some hueristics or some way to allow
231 : * userspace to pass a hint to file system,
232 : * especially if the latter case turns out to be
233 : * common.
234 : */
235 3999935 : ex = path[depth].p_ext;
236 3999935 : if (ex) {
237 3138486 : ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
238 3138486 : ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
239 :
240 3138486 : if (block > ext_block)
241 3060493 : return ext_pblk + (block - ext_block);
242 : else
243 77993 : return ext_pblk - (ext_block - block);
244 : }
245 :
246 : /* it looks like index is empty;
247 : * try to find starting block from index itself */
248 861449 : if (path[depth].p_bh)
249 0 : return path[depth].p_bh->b_blocknr;
250 : }
251 :
252 : /* OK. use inode's group */
253 861449 : return ext4_inode_to_goal_block(inode);
254 : }
255 :
256 : /*
257 : * Allocation for a meta data block
258 : */
259 : static ext4_fsblk_t
260 5978 : ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
261 : struct ext4_ext_path *path,
262 : struct ext4_extent *ex, int *err, unsigned int flags)
263 : {
264 5978 : ext4_fsblk_t goal, newblock;
265 :
266 5978 : goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
267 5978 : newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
268 : NULL, err);
269 5978 : return newblock;
270 : }
271 :
272 : static inline int ext4_ext_space_block(struct inode *inode, int check)
273 : {
274 78002 : int size;
275 :
276 78002 : size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
277 78002 : / sizeof(struct ext4_extent);
278 : #ifdef AGGRESSIVE_TEST
279 : if (!check && size > 6)
280 : size = 6;
281 : #endif
282 78002 : return size;
283 : }
284 :
285 : static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
286 : {
287 81 : int size;
288 :
289 81 : size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
290 81 : / sizeof(struct ext4_extent_idx);
291 : #ifdef AGGRESSIVE_TEST
292 : if (!check && size > 5)
293 : size = 5;
294 : #endif
295 81 : return size;
296 : }
297 :
298 : static inline int ext4_ext_space_root(struct inode *inode, int check)
299 : {
300 : int size;
301 :
302 : size = sizeof(EXT4_I(inode)->i_data);
303 : size -= sizeof(struct ext4_extent_header);
304 : size /= sizeof(struct ext4_extent);
305 : #ifdef AGGRESSIVE_TEST
306 : if (!check && size > 3)
307 : size = 3;
308 : #endif
309 : return size;
310 : }
311 :
312 : static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
313 : {
314 : int size;
315 :
316 : size = sizeof(EXT4_I(inode)->i_data);
317 : size -= sizeof(struct ext4_extent_header);
318 : size /= sizeof(struct ext4_extent_idx);
319 : #ifdef AGGRESSIVE_TEST
320 : if (!check && size > 4)
321 : size = 4;
322 : #endif
323 : return size;
324 : }
325 :
326 : static inline int
327 1540320 : ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
328 : struct ext4_ext_path **ppath, ext4_lblk_t lblk,
329 : int nofail)
330 : {
331 1540320 : struct ext4_ext_path *path = *ppath;
332 1540320 : int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
333 1540320 : int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO;
334 :
335 1540320 : if (nofail)
336 164397 : flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL;
337 :
338 1704672 : return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
339 : EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
340 : flags);
341 : }
342 :
343 : static int
344 936082 : ext4_ext_max_entries(struct inode *inode, int depth)
345 : {
346 936082 : int max;
347 :
348 936082 : if (depth == ext_depth(inode)) {
349 : if (depth == 0)
350 : max = ext4_ext_space_root(inode, 1);
351 : else
352 : max = ext4_ext_space_root_idx(inode, 1);
353 : } else {
354 8422 : if (depth == 0)
355 8399 : max = ext4_ext_space_block(inode, 1);
356 : else
357 23 : max = ext4_ext_space_block_idx(inode, 1);
358 : }
359 :
360 936082 : return max;
361 : }
362 :
363 1515273 : static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
364 : {
365 1515273 : ext4_fsblk_t block = ext4_ext_pblock(ext);
366 1515273 : int len = ext4_ext_get_actual_len(ext);
367 1515273 : ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
368 :
369 : /*
370 : * We allow neither:
371 : * - zero length
372 : * - overflow/wrap-around
373 : */
374 1515273 : if (lblock + len <= lblock)
375 : return 0;
376 1515238 : return ext4_inode_block_valid(inode, block, len);
377 : }
378 :
379 172818 : static int ext4_valid_extent_idx(struct inode *inode,
380 : struct ext4_extent_idx *ext_idx)
381 : {
382 172818 : ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
383 :
384 172818 : return ext4_inode_block_valid(inode, block, 1);
385 : }
386 :
387 936118 : static int ext4_valid_extent_entries(struct inode *inode,
388 : struct ext4_extent_header *eh,
389 : ext4_lblk_t lblk, ext4_fsblk_t *pblk,
390 : int depth)
391 : {
392 936118 : unsigned short entries;
393 936118 : ext4_lblk_t lblock = 0;
394 936118 : ext4_lblk_t cur = 0;
395 :
396 936118 : if (eh->eh_entries == 0)
397 : return 1;
398 :
399 651769 : entries = le16_to_cpu(eh->eh_entries);
400 :
401 651769 : if (depth == 0) {
402 : /* leaf entries */
403 481181 : struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
404 :
405 : /*
406 : * The logical block in the first entry should equal to
407 : * the number in the index block.
408 : */
409 481181 : if (depth != ext_depth(inode) &&
410 8260 : lblk != le32_to_cpu(ext->ee_block))
411 : return 0;
412 1996544 : while (entries) {
413 1515286 : if (!ext4_valid_extent(inode, ext))
414 : return 0;
415 :
416 : /* Check for overlapping extents */
417 1515365 : lblock = le32_to_cpu(ext->ee_block);
418 1515365 : if (lblock < cur) {
419 0 : *pblk = ext4_ext_pblock(ext);
420 0 : return 0;
421 : }
422 1515365 : cur = lblock + ext4_ext_get_actual_len(ext);
423 1515365 : ext++;
424 1515365 : entries--;
425 : }
426 : } else {
427 170588 : struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
428 :
429 : /*
430 : * The logical block in the first entry should equal to
431 : * the number in the parent index block.
432 : */
433 170588 : if (depth != ext_depth(inode) &&
434 23 : lblk != le32_to_cpu(ext_idx->ei_block))
435 : return 0;
436 343408 : while (entries) {
437 172818 : if (!ext4_valid_extent_idx(inode, ext_idx))
438 : return 0;
439 :
440 : /* Check for overlapping index extents */
441 172820 : lblock = le32_to_cpu(ext_idx->ei_block);
442 172820 : if (lblock < cur) {
443 0 : *pblk = ext4_idx_pblock(ext_idx);
444 0 : return 0;
445 : }
446 172820 : ext_idx++;
447 172820 : entries--;
448 172820 : cur = lblock + 1;
449 : }
450 : }
451 : return 1;
452 : }
453 :
454 935996 : static int __ext4_ext_check(const char *function, unsigned int line,
455 : struct inode *inode, struct ext4_extent_header *eh,
456 : int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk)
457 : {
458 935996 : const char *error_msg;
459 935996 : int max = 0, err = -EFSCORRUPTED;
460 :
461 935996 : if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
462 0 : error_msg = "invalid magic";
463 0 : goto corrupted;
464 : }
465 935996 : if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
466 0 : error_msg = "unexpected eh_depth";
467 0 : goto corrupted;
468 : }
469 935996 : if (unlikely(eh->eh_max == 0)) {
470 0 : error_msg = "invalid eh_max";
471 0 : goto corrupted;
472 : }
473 935996 : max = ext4_ext_max_entries(inode, depth);
474 935996 : if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
475 0 : error_msg = "too large eh_max";
476 0 : goto corrupted;
477 : }
478 935996 : if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
479 0 : error_msg = "invalid eh_entries";
480 0 : goto corrupted;
481 : }
482 935996 : if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
483 0 : error_msg = "eh_entries is 0 but eh_depth is > 0";
484 0 : goto corrupted;
485 : }
486 935996 : if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
487 2 : error_msg = "invalid extent entries";
488 2 : goto corrupted;
489 : }
490 936131 : if (unlikely(depth > 32)) {
491 0 : error_msg = "too large eh_depth";
492 0 : goto corrupted;
493 : }
494 : /* Verify checksum on non-root extent tree nodes */
495 944551 : if (ext_depth(inode) != depth &&
496 8420 : !ext4_extent_block_csum_verify(inode, eh)) {
497 0 : error_msg = "extent tree corrupted";
498 0 : err = -EFSBADCRC;
499 0 : goto corrupted;
500 : }
501 : return 0;
502 :
503 2 : corrupted:
504 2 : ext4_error_inode_err(inode, function, line, 0, -err,
505 : "pblk %llu bad header/extent: %s - magic %x, "
506 : "entries %u, max %u(%u), depth %u(%u)",
507 : (unsigned long long) pblk, error_msg,
508 : le16_to_cpu(eh->eh_magic),
509 : le16_to_cpu(eh->eh_entries),
510 : le16_to_cpu(eh->eh_max),
511 : max, le16_to_cpu(eh->eh_depth), depth);
512 2 : return err;
513 : }
514 :
515 : #define ext4_ext_check(inode, eh, depth, pblk) \
516 : __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0)
517 :
518 201219 : int ext4_ext_check_inode(struct inode *inode)
519 : {
520 201219 : return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
521 : }
522 :
523 3883428 : static void ext4_cache_extents(struct inode *inode,
524 : struct ext4_extent_header *eh)
525 : {
526 3883428 : struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
527 3883428 : ext4_lblk_t prev = 0;
528 3883428 : int i;
529 :
530 6890944 : for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
531 3003311 : unsigned int status = EXTENT_STATUS_WRITTEN;
532 3003311 : ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
533 3003311 : int len = ext4_ext_get_actual_len(ex);
534 :
535 3003311 : if (prev && (prev != lblk))
536 1068306 : ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
537 : EXTENT_STATUS_HOLE);
538 :
539 3003316 : if (ext4_ext_is_unwritten(ex))
540 1424236 : status = EXTENT_STATUS_UNWRITTEN;
541 3003316 : ext4_es_cache_extent(inode, lblk, len,
542 : ext4_ext_pblock(ex), status);
543 3007516 : prev = lblk + len;
544 : }
545 3887633 : }
546 :
547 : static struct buffer_head *
548 18501198 : __read_extent_tree_block(const char *function, unsigned int line,
549 : struct inode *inode, struct ext4_extent_idx *idx,
550 : int depth, int flags)
551 : {
552 18501198 : struct buffer_head *bh;
553 18501198 : int err;
554 18501198 : gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS;
555 18501198 : ext4_fsblk_t pblk;
556 :
557 18501198 : if (flags & EXT4_EX_NOFAIL)
558 390613 : gfp_flags |= __GFP_NOFAIL;
559 :
560 18501198 : pblk = ext4_idx_pblock(idx);
561 18501198 : bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags);
562 18504905 : if (unlikely(!bh))
563 : return ERR_PTR(-ENOMEM);
564 :
565 18504905 : if (!bh_uptodate_or_lock(bh)) {
566 2572 : trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
567 2572 : err = ext4_read_bh(bh, 0, NULL);
568 2572 : if (err < 0)
569 0 : goto errout;
570 : }
571 37005354 : if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
572 : return bh;
573 8375 : err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh),
574 8375 : depth, pblk, le32_to_cpu(idx->ei_block));
575 8422 : if (err)
576 2 : goto errout;
577 8420 : set_buffer_verified(bh);
578 : /*
579 : * If this is a leaf block, cache all of its entries
580 : */
581 8420 : if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
582 4931 : struct ext4_extent_header *eh = ext_block_hdr(bh);
583 4931 : ext4_cache_extents(inode, eh);
584 : }
585 : return bh;
586 2 : errout:
587 2 : put_bh(bh);
588 2 : return ERR_PTR(err);
589 :
590 : }
591 :
592 : #define read_extent_tree_block(inode, idx, depth, flags) \
593 : __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \
594 : (depth), (flags))
595 :
596 : /*
597 : * This function is called to cache a file's extent information in the
598 : * extent status tree
599 : */
600 0 : int ext4_ext_precache(struct inode *inode)
601 : {
602 0 : struct ext4_inode_info *ei = EXT4_I(inode);
603 0 : struct ext4_ext_path *path = NULL;
604 0 : struct buffer_head *bh;
605 0 : int i = 0, depth, ret = 0;
606 :
607 0 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
608 : return 0; /* not an extent-mapped inode */
609 :
610 0 : down_read(&ei->i_data_sem);
611 0 : depth = ext_depth(inode);
612 :
613 : /* Don't cache anything if there are no external extent blocks */
614 0 : if (!depth) {
615 0 : up_read(&ei->i_data_sem);
616 0 : return ret;
617 : }
618 :
619 0 : path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
620 : GFP_NOFS);
621 0 : if (path == NULL) {
622 0 : up_read(&ei->i_data_sem);
623 0 : return -ENOMEM;
624 : }
625 :
626 0 : path[0].p_hdr = ext_inode_hdr(inode);
627 0 : ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0);
628 0 : if (ret)
629 0 : goto out;
630 0 : path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr);
631 0 : while (i >= 0) {
632 : /*
633 : * If this is a leaf block or we've reached the end of
634 : * the index block, go up
635 : */
636 0 : if ((i == depth) ||
637 0 : path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
638 0 : brelse(path[i].p_bh);
639 0 : path[i].p_bh = NULL;
640 0 : i--;
641 0 : continue;
642 : }
643 0 : bh = read_extent_tree_block(inode, path[i].p_idx++,
644 : depth - i - 1,
645 : EXT4_EX_FORCE_CACHE);
646 0 : if (IS_ERR(bh)) {
647 0 : ret = PTR_ERR(bh);
648 0 : break;
649 : }
650 0 : i++;
651 0 : path[i].p_bh = bh;
652 0 : path[i].p_hdr = ext_block_hdr(bh);
653 0 : path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr);
654 : }
655 0 : ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
656 0 : out:
657 0 : up_read(&ei->i_data_sem);
658 0 : ext4_free_ext_path(path);
659 0 : return ret;
660 : }
661 :
662 : #ifdef EXT_DEBUG
663 : static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
664 : {
665 : int k, l = path->p_depth;
666 :
667 : ext_debug(inode, "path:");
668 : for (k = 0; k <= l; k++, path++) {
669 : if (path->p_idx) {
670 : ext_debug(inode, " %d->%llu",
671 : le32_to_cpu(path->p_idx->ei_block),
672 : ext4_idx_pblock(path->p_idx));
673 : } else if (path->p_ext) {
674 : ext_debug(inode, " %d:[%d]%d:%llu ",
675 : le32_to_cpu(path->p_ext->ee_block),
676 : ext4_ext_is_unwritten(path->p_ext),
677 : ext4_ext_get_actual_len(path->p_ext),
678 : ext4_ext_pblock(path->p_ext));
679 : } else
680 : ext_debug(inode, " []");
681 : }
682 : ext_debug(inode, "\n");
683 : }
684 :
685 : static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
686 : {
687 : int depth = ext_depth(inode);
688 : struct ext4_extent_header *eh;
689 : struct ext4_extent *ex;
690 : int i;
691 :
692 : if (!path)
693 : return;
694 :
695 : eh = path[depth].p_hdr;
696 : ex = EXT_FIRST_EXTENT(eh);
697 :
698 : ext_debug(inode, "Displaying leaf extents\n");
699 :
700 : for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
701 : ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
702 : ext4_ext_is_unwritten(ex),
703 : ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
704 : }
705 : ext_debug(inode, "\n");
706 : }
707 :
708 : static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
709 : ext4_fsblk_t newblock, int level)
710 : {
711 : int depth = ext_depth(inode);
712 : struct ext4_extent *ex;
713 :
714 : if (depth != level) {
715 : struct ext4_extent_idx *idx;
716 : idx = path[level].p_idx;
717 : while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
718 : ext_debug(inode, "%d: move %d:%llu in new index %llu\n",
719 : level, le32_to_cpu(idx->ei_block),
720 : ext4_idx_pblock(idx), newblock);
721 : idx++;
722 : }
723 :
724 : return;
725 : }
726 :
727 : ex = path[depth].p_ext;
728 : while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
729 : ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n",
730 : le32_to_cpu(ex->ee_block),
731 : ext4_ext_pblock(ex),
732 : ext4_ext_is_unwritten(ex),
733 : ext4_ext_get_actual_len(ex),
734 : newblock);
735 : ex++;
736 : }
737 : }
738 :
739 : #else
740 : #define ext4_ext_show_path(inode, path)
741 : #define ext4_ext_show_leaf(inode, path)
742 : #define ext4_ext_show_move(inode, path, newblock, level)
743 : #endif
744 :
745 : /*
746 : * ext4_ext_binsearch_idx:
747 : * binary search for the closest index of the given block
748 : * the header must be checked before calling this
749 : */
750 : static void
751 18271071 : ext4_ext_binsearch_idx(struct inode *inode,
752 : struct ext4_ext_path *path, ext4_lblk_t block)
753 : {
754 18271071 : struct ext4_extent_header *eh = path->p_hdr;
755 18271071 : struct ext4_extent_idx *r, *l, *m;
756 :
757 :
758 18271071 : ext_debug(inode, "binsearch for %u(idx): ", block);
759 :
760 18271071 : l = EXT_FIRST_INDEX(eh) + 1;
761 18271071 : r = EXT_LAST_INDEX(eh);
762 46864096 : while (l <= r) {
763 28593025 : m = l + (r - l) / 2;
764 28593025 : ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
765 : le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
766 : r, le32_to_cpu(r->ei_block));
767 :
768 28593025 : if (block < le32_to_cpu(m->ei_block))
769 14368273 : r = m - 1;
770 : else
771 14224752 : l = m + 1;
772 : }
773 :
774 18271071 : path->p_idx = l - 1;
775 18271071 : ext_debug(inode, " -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
776 : ext4_idx_pblock(path->p_idx));
777 :
778 : #ifdef CHECK_BINSEARCH
779 : {
780 : struct ext4_extent_idx *chix, *ix;
781 : int k;
782 :
783 : chix = ix = EXT_FIRST_INDEX(eh);
784 : for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
785 : if (k != 0 && le32_to_cpu(ix->ei_block) <=
786 : le32_to_cpu(ix[-1].ei_block)) {
787 : printk(KERN_DEBUG "k=%d, ix=0x%p, "
788 : "first=0x%p\n", k,
789 : ix, EXT_FIRST_INDEX(eh));
790 : printk(KERN_DEBUG "%u <= %u\n",
791 : le32_to_cpu(ix->ei_block),
792 : le32_to_cpu(ix[-1].ei_block));
793 : }
794 : BUG_ON(k && le32_to_cpu(ix->ei_block)
795 : <= le32_to_cpu(ix[-1].ei_block));
796 : if (block < le32_to_cpu(ix->ei_block))
797 : break;
798 : chix = ix;
799 : }
800 : BUG_ON(chix != path->p_idx);
801 : }
802 : #endif
803 :
804 18271071 : }
805 :
806 : /*
807 : * ext4_ext_binsearch:
808 : * binary search for closest extent of the given block
809 : * the header must be checked before calling this
810 : */
811 : static void
812 18422510 : ext4_ext_binsearch(struct inode *inode,
813 : struct ext4_ext_path *path, ext4_lblk_t block)
814 : {
815 18422510 : struct ext4_extent_header *eh = path->p_hdr;
816 18422510 : struct ext4_extent *r, *l, *m;
817 :
818 18422510 : if (eh->eh_entries == 0) {
819 : /*
820 : * this leaf is empty:
821 : * we get such a leaf in split/add case
822 : */
823 : return;
824 : }
825 :
826 15660968 : ext_debug(inode, "binsearch for %u: ", block);
827 :
828 15660968 : l = EXT_FIRST_EXTENT(eh) + 1;
829 15660968 : r = EXT_LAST_EXTENT(eh);
830 :
831 92981188 : while (l <= r) {
832 77320220 : m = l + (r - l) / 2;
833 77320220 : ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
834 : le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
835 : r, le32_to_cpu(r->ee_block));
836 :
837 77320220 : if (block < le32_to_cpu(m->ee_block))
838 32416179 : r = m - 1;
839 : else
840 44904041 : l = m + 1;
841 : }
842 :
843 15660968 : path->p_ext = l - 1;
844 15660968 : ext_debug(inode, " -> %d:%llu:[%d]%d ",
845 : le32_to_cpu(path->p_ext->ee_block),
846 : ext4_ext_pblock(path->p_ext),
847 : ext4_ext_is_unwritten(path->p_ext),
848 : ext4_ext_get_actual_len(path->p_ext));
849 :
850 : #ifdef CHECK_BINSEARCH
851 : {
852 : struct ext4_extent *chex, *ex;
853 : int k;
854 :
855 : chex = ex = EXT_FIRST_EXTENT(eh);
856 : for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
857 : BUG_ON(k && le32_to_cpu(ex->ee_block)
858 : <= le32_to_cpu(ex[-1].ee_block));
859 : if (block < le32_to_cpu(ex->ee_block))
860 : break;
861 : chex = ex;
862 : }
863 : BUG_ON(chex != path->p_ext);
864 : }
865 : #endif
866 :
867 : }
868 :
869 2599439 : void ext4_ext_tree_init(handle_t *handle, struct inode *inode)
870 : {
871 2599439 : struct ext4_extent_header *eh;
872 :
873 2599439 : eh = ext_inode_hdr(inode);
874 2599439 : eh->eh_depth = 0;
875 2599439 : eh->eh_entries = 0;
876 2599439 : eh->eh_magic = EXT4_EXT_MAGIC;
877 2599439 : eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
878 2599439 : eh->eh_generation = 0;
879 2599439 : ext4_mark_inode_dirty(handle, inode);
880 2607561 : }
881 :
882 : struct ext4_ext_path *
883 18425850 : ext4_find_extent(struct inode *inode, ext4_lblk_t block,
884 : struct ext4_ext_path **orig_path, int flags)
885 : {
886 18425850 : struct ext4_extent_header *eh;
887 18425850 : struct buffer_head *bh;
888 18425850 : struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
889 18425850 : short int depth, i, ppos = 0;
890 18425850 : int ret;
891 18425850 : gfp_t gfp_flags = GFP_NOFS;
892 :
893 18425850 : if (flags & EXT4_EX_NOFAIL)
894 425351 : gfp_flags |= __GFP_NOFAIL;
895 :
896 18425850 : eh = ext_inode_hdr(inode);
897 18425850 : depth = ext_depth(inode);
898 18425850 : if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
899 0 : EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
900 : depth);
901 0 : ret = -EFSCORRUPTED;
902 0 : goto err;
903 : }
904 :
905 18425850 : if (path) {
906 1712252 : ext4_ext_drop_refs(path);
907 1712251 : if (depth > path[0].p_maxdepth) {
908 0 : kfree(path);
909 0 : *orig_path = path = NULL;
910 : }
911 : }
912 18425849 : if (!path) {
913 : /* account possible depth increase */
914 16707627 : path = kcalloc(depth + 2, sizeof(struct ext4_ext_path),
915 : gfp_flags);
916 16702404 : if (unlikely(!path))
917 : return ERR_PTR(-ENOMEM);
918 16702404 : path[0].p_maxdepth = depth + 1;
919 : }
920 18420626 : path[0].p_hdr = eh;
921 18420626 : path[0].p_bh = NULL;
922 :
923 18420626 : i = depth;
924 18420626 : if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
925 3881387 : ext4_cache_extents(inode, eh);
926 : /* walk through the tree */
927 36695302 : while (i) {
928 18273064 : ext_debug(inode, "depth %d: num %d, max %d\n",
929 : ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
930 :
931 18273064 : ext4_ext_binsearch_idx(inode, path + ppos, block);
932 18271822 : path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
933 18271822 : path[ppos].p_depth = i;
934 18271822 : path[ppos].p_ext = NULL;
935 :
936 18271822 : bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags);
937 18273185 : if (IS_ERR(bh)) {
938 1 : ret = PTR_ERR(bh);
939 1 : goto err;
940 : }
941 :
942 18273184 : eh = ext_block_hdr(bh);
943 18273184 : ppos++;
944 18273184 : path[ppos].p_bh = bh;
945 18273184 : path[ppos].p_hdr = eh;
946 : }
947 :
948 18422238 : path[ppos].p_depth = i;
949 18422238 : path[ppos].p_ext = NULL;
950 18422238 : path[ppos].p_idx = NULL;
951 :
952 : /* find extent */
953 18422238 : ext4_ext_binsearch(inode, path + ppos, block);
954 : /* if not an empty leaf */
955 18422379 : if (path[ppos].p_ext)
956 15660881 : path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
957 :
958 : ext4_ext_show_path(inode, path);
959 :
960 : return path;
961 :
962 1 : err:
963 1 : ext4_free_ext_path(path);
964 1 : if (orig_path)
965 0 : *orig_path = NULL;
966 1 : return ERR_PTR(ret);
967 : }
968 :
969 : /*
970 : * ext4_ext_insert_index:
971 : * insert new index [@logical;@ptr] into the block at @curp;
972 : * check where to insert: before @curp or after @curp
973 : */
974 5969 : static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
975 : struct ext4_ext_path *curp,
976 : int logical, ext4_fsblk_t ptr)
977 : {
978 5969 : struct ext4_extent_idx *ix;
979 5969 : int len, err;
980 :
981 5969 : err = ext4_ext_get_access(handle, inode, curp);
982 5969 : if (err)
983 : return err;
984 :
985 5969 : if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
986 0 : EXT4_ERROR_INODE(inode,
987 : "logical %d == ei_block %d!",
988 : logical, le32_to_cpu(curp->p_idx->ei_block));
989 0 : return -EFSCORRUPTED;
990 : }
991 :
992 5969 : if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
993 : >= le16_to_cpu(curp->p_hdr->eh_max))) {
994 0 : EXT4_ERROR_INODE(inode,
995 : "eh_entries %d >= eh_max %d!",
996 : le16_to_cpu(curp->p_hdr->eh_entries),
997 : le16_to_cpu(curp->p_hdr->eh_max));
998 0 : return -EFSCORRUPTED;
999 : }
1000 :
1001 5969 : if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
1002 : /* insert after */
1003 5969 : ext_debug(inode, "insert new index %d after: %llu\n",
1004 : logical, ptr);
1005 5969 : ix = curp->p_idx + 1;
1006 : } else {
1007 : /* insert before */
1008 : ext_debug(inode, "insert new index %d before: %llu\n",
1009 : logical, ptr);
1010 : ix = curp->p_idx;
1011 : }
1012 :
1013 5969 : len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
1014 5969 : BUG_ON(len < 0);
1015 5969 : if (len > 0) {
1016 4329 : ext_debug(inode, "insert new index %d: "
1017 : "move %d indices from 0x%p to 0x%p\n",
1018 : logical, len, ix, ix + 1);
1019 8658 : memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
1020 : }
1021 :
1022 5969 : if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
1023 0 : EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
1024 0 : return -EFSCORRUPTED;
1025 : }
1026 :
1027 5969 : ix->ei_block = cpu_to_le32(logical);
1028 5969 : ext4_idx_store_pblock(ix, ptr);
1029 5969 : le16_add_cpu(&curp->p_hdr->eh_entries, 1);
1030 :
1031 5969 : if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
1032 0 : EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
1033 0 : return -EFSCORRUPTED;
1034 : }
1035 :
1036 5969 : err = ext4_ext_dirty(handle, inode, curp);
1037 5969 : ext4_std_error(inode->i_sb, err);
1038 :
1039 : return err;
1040 : }
1041 :
1042 : /*
1043 : * ext4_ext_split:
1044 : * inserts new subtree into the path, using free index entry
1045 : * at depth @at:
1046 : * - allocates all needed blocks (new leaf and all intermediate index blocks)
1047 : * - makes decision where to split
1048 : * - moves remaining extents and index entries (right to the split point)
1049 : * into the newly allocated blocks
1050 : * - initializes subtree
1051 : */
1052 5969 : static int ext4_ext_split(handle_t *handle, struct inode *inode,
1053 : unsigned int flags,
1054 : struct ext4_ext_path *path,
1055 : struct ext4_extent *newext, int at)
1056 : {
1057 5969 : struct buffer_head *bh = NULL;
1058 5969 : int depth = ext_depth(inode);
1059 5969 : struct ext4_extent_header *neh;
1060 5969 : struct ext4_extent_idx *fidx;
1061 5969 : int i = at, k, m, a;
1062 5969 : ext4_fsblk_t newblock, oldblock;
1063 5969 : __le32 border;
1064 5969 : ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
1065 5969 : gfp_t gfp_flags = GFP_NOFS;
1066 5969 : int err = 0;
1067 5969 : size_t ext_size = 0;
1068 :
1069 5969 : if (flags & EXT4_EX_NOFAIL)
1070 0 : gfp_flags |= __GFP_NOFAIL;
1071 :
1072 : /* make decision: where to split? */
1073 : /* FIXME: now decision is simplest: at current extent */
1074 :
1075 : /* if current leaf will be split, then we should use
1076 : * border from split point */
1077 5969 : if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
1078 0 : EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
1079 0 : return -EFSCORRUPTED;
1080 : }
1081 5969 : if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
1082 5828 : border = path[depth].p_ext[1].ee_block;
1083 5828 : ext_debug(inode, "leaf will be split."
1084 : " next leaf starts at %d\n",
1085 : le32_to_cpu(border));
1086 : } else {
1087 141 : border = newext->ee_block;
1088 141 : ext_debug(inode, "leaf will be added."
1089 : " next leaf starts at %d\n",
1090 : le32_to_cpu(border));
1091 : }
1092 :
1093 : /*
1094 : * If error occurs, then we break processing
1095 : * and mark filesystem read-only. index won't
1096 : * be inserted and tree will be in consistent
1097 : * state. Next mount will repair buffers too.
1098 : */
1099 :
1100 : /*
1101 : * Get array to track all allocated blocks.
1102 : * We need this to handle errors and free blocks
1103 : * upon them.
1104 : */
1105 5969 : ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags);
1106 5969 : if (!ablocks)
1107 : return -ENOMEM;
1108 :
1109 : /* allocate all needed blocks */
1110 : ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at);
1111 11947 : for (a = 0; a < depth - at; a++) {
1112 5978 : newblock = ext4_ext_new_meta_block(handle, inode, path,
1113 : newext, &err, flags);
1114 5978 : if (newblock == 0)
1115 0 : goto cleanup;
1116 5978 : ablocks[a] = newblock;
1117 : }
1118 :
1119 : /* initialize new leaf */
1120 5969 : newblock = ablocks[--a];
1121 5969 : if (unlikely(newblock == 0)) {
1122 0 : EXT4_ERROR_INODE(inode, "newblock == 0!");
1123 0 : err = -EFSCORRUPTED;
1124 0 : goto cleanup;
1125 : }
1126 5969 : bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
1127 5969 : if (unlikely(!bh)) {
1128 0 : err = -ENOMEM;
1129 0 : goto cleanup;
1130 : }
1131 5969 : lock_buffer(bh);
1132 :
1133 5969 : err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
1134 : EXT4_JTR_NONE);
1135 5969 : if (err)
1136 0 : goto cleanup;
1137 :
1138 5969 : neh = ext_block_hdr(bh);
1139 5969 : neh->eh_entries = 0;
1140 5969 : neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
1141 5969 : neh->eh_magic = EXT4_EXT_MAGIC;
1142 5969 : neh->eh_depth = 0;
1143 5969 : neh->eh_generation = 0;
1144 :
1145 : /* move remainder of path[depth] to the new leaf */
1146 5969 : if (unlikely(path[depth].p_hdr->eh_entries !=
1147 : path[depth].p_hdr->eh_max)) {
1148 0 : EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
1149 : path[depth].p_hdr->eh_entries,
1150 : path[depth].p_hdr->eh_max);
1151 0 : err = -EFSCORRUPTED;
1152 0 : goto cleanup;
1153 : }
1154 : /* start copy from next extent */
1155 5969 : m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
1156 5969 : ext4_ext_show_move(inode, path, newblock, depth);
1157 5969 : if (m) {
1158 5828 : struct ext4_extent *ex;
1159 5828 : ex = EXT_FIRST_EXTENT(neh);
1160 11656 : memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
1161 5828 : le16_add_cpu(&neh->eh_entries, m);
1162 : }
1163 :
1164 : /* zero out unused area in the extent block */
1165 5969 : ext_size = sizeof(struct ext4_extent_header) +
1166 5969 : sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries);
1167 5969 : memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
1168 5969 : ext4_extent_block_csum_set(inode, neh);
1169 5969 : set_buffer_uptodate(bh);
1170 5969 : unlock_buffer(bh);
1171 :
1172 5969 : err = ext4_handle_dirty_metadata(handle, inode, bh);
1173 5969 : if (err)
1174 0 : goto cleanup;
1175 5969 : brelse(bh);
1176 5969 : bh = NULL;
1177 :
1178 : /* correct old leaf */
1179 5969 : if (m) {
1180 5828 : err = ext4_ext_get_access(handle, inode, path + depth);
1181 5828 : if (err)
1182 0 : goto cleanup;
1183 5828 : le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
1184 5828 : err = ext4_ext_dirty(handle, inode, path + depth);
1185 5828 : if (err)
1186 0 : goto cleanup;
1187 :
1188 : }
1189 :
1190 : /* create intermediate indexes */
1191 5969 : k = depth - at - 1;
1192 5969 : if (unlikely(k < 0)) {
1193 0 : EXT4_ERROR_INODE(inode, "k %d < 0!", k);
1194 0 : err = -EFSCORRUPTED;
1195 0 : goto cleanup;
1196 : }
1197 5969 : if (k)
1198 : ext_debug(inode, "create %d intermediate indices\n", k);
1199 : /* insert new index into current index block */
1200 : /* current depth stored in i var */
1201 5969 : i = depth - 1;
1202 5978 : while (k--) {
1203 9 : oldblock = newblock;
1204 9 : newblock = ablocks[--a];
1205 9 : bh = sb_getblk(inode->i_sb, newblock);
1206 9 : if (unlikely(!bh)) {
1207 0 : err = -ENOMEM;
1208 0 : goto cleanup;
1209 : }
1210 9 : lock_buffer(bh);
1211 :
1212 9 : err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
1213 : EXT4_JTR_NONE);
1214 9 : if (err)
1215 0 : goto cleanup;
1216 :
1217 9 : neh = ext_block_hdr(bh);
1218 9 : neh->eh_entries = cpu_to_le16(1);
1219 9 : neh->eh_magic = EXT4_EXT_MAGIC;
1220 9 : neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
1221 9 : neh->eh_depth = cpu_to_le16(depth - i);
1222 9 : neh->eh_generation = 0;
1223 9 : fidx = EXT_FIRST_INDEX(neh);
1224 9 : fidx->ei_block = border;
1225 9 : ext4_idx_store_pblock(fidx, oldblock);
1226 :
1227 9 : ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n",
1228 : i, newblock, le32_to_cpu(border), oldblock);
1229 :
1230 : /* move remainder of path[i] to the new index block */
1231 9 : if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
1232 : EXT_LAST_INDEX(path[i].p_hdr))) {
1233 0 : EXT4_ERROR_INODE(inode,
1234 : "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
1235 : le32_to_cpu(path[i].p_ext->ee_block));
1236 0 : err = -EFSCORRUPTED;
1237 0 : goto cleanup;
1238 : }
1239 : /* start copy indexes */
1240 9 : m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
1241 9 : ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
1242 : EXT_MAX_INDEX(path[i].p_hdr));
1243 9 : ext4_ext_show_move(inode, path, newblock, i);
1244 9 : if (m) {
1245 9 : memmove(++fidx, path[i].p_idx,
1246 : sizeof(struct ext4_extent_idx) * m);
1247 9 : le16_add_cpu(&neh->eh_entries, m);
1248 : }
1249 : /* zero out unused area in the extent block */
1250 9 : ext_size = sizeof(struct ext4_extent_header) +
1251 9 : (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries));
1252 9 : memset(bh->b_data + ext_size, 0,
1253 : inode->i_sb->s_blocksize - ext_size);
1254 9 : ext4_extent_block_csum_set(inode, neh);
1255 9 : set_buffer_uptodate(bh);
1256 9 : unlock_buffer(bh);
1257 :
1258 9 : err = ext4_handle_dirty_metadata(handle, inode, bh);
1259 9 : if (err)
1260 0 : goto cleanup;
1261 9 : brelse(bh);
1262 9 : bh = NULL;
1263 :
1264 : /* correct old index */
1265 9 : if (m) {
1266 9 : err = ext4_ext_get_access(handle, inode, path + i);
1267 9 : if (err)
1268 0 : goto cleanup;
1269 9 : le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
1270 9 : err = ext4_ext_dirty(handle, inode, path + i);
1271 9 : if (err)
1272 0 : goto cleanup;
1273 : }
1274 :
1275 9 : i--;
1276 : }
1277 :
1278 : /* insert new index */
1279 5969 : err = ext4_ext_insert_index(handle, inode, path + at,
1280 : le32_to_cpu(border), newblock);
1281 :
1282 0 : cleanup:
1283 5969 : if (bh) {
1284 0 : if (buffer_locked(bh))
1285 0 : unlock_buffer(bh);
1286 0 : brelse(bh);
1287 : }
1288 :
1289 5969 : if (err) {
1290 : /* free all allocated blocks in error case */
1291 0 : for (i = 0; i < depth; i++) {
1292 0 : if (!ablocks[i])
1293 0 : continue;
1294 0 : ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
1295 : EXT4_FREE_BLOCKS_METADATA);
1296 : }
1297 : }
1298 5969 : kfree(ablocks);
1299 :
1300 5969 : return err;
1301 : }
1302 :
1303 : /*
1304 : * ext4_ext_grow_indepth:
1305 : * implements tree growing procedure:
1306 : * - allocates new block
1307 : * - moves top-level data (index block or leaf) into the new block
1308 : * - initializes new top-level, creating index that points to the
1309 : * just created block
1310 : */
1311 63961 : static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1312 : unsigned int flags)
1313 : {
1314 63961 : struct ext4_extent_header *neh;
1315 63961 : struct buffer_head *bh;
1316 63961 : ext4_fsblk_t newblock, goal = 0;
1317 63961 : struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
1318 63961 : int err = 0;
1319 63961 : size_t ext_size = 0;
1320 :
1321 : /* Try to prepend new index to old one */
1322 63961 : if (ext_depth(inode))
1323 49 : goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
1324 63961 : if (goal > le32_to_cpu(es->s_first_data_block)) {
1325 49 : flags |= EXT4_MB_HINT_TRY_GOAL;
1326 49 : goal--;
1327 : } else
1328 63912 : goal = ext4_inode_to_goal_block(inode);
1329 63961 : newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
1330 : NULL, &err);
1331 63961 : if (newblock == 0)
1332 276 : return err;
1333 :
1334 63685 : bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
1335 63685 : if (unlikely(!bh))
1336 : return -ENOMEM;
1337 63685 : lock_buffer(bh);
1338 :
1339 63685 : err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
1340 : EXT4_JTR_NONE);
1341 63683 : if (err) {
1342 0 : unlock_buffer(bh);
1343 0 : goto out;
1344 : }
1345 :
1346 63683 : ext_size = sizeof(EXT4_I(inode)->i_data);
1347 : /* move top-level index/leaf into new block */
1348 127366 : memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size);
1349 : /* zero out unused area in the extent block */
1350 63683 : memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
1351 :
1352 : /* set size of new block */
1353 63683 : neh = ext_block_hdr(bh);
1354 : /* old root could have indexes or leaves
1355 : * so calculate e_max right way */
1356 63683 : if (ext_depth(inode))
1357 49 : neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
1358 : else
1359 63634 : neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
1360 63683 : neh->eh_magic = EXT4_EXT_MAGIC;
1361 63683 : ext4_extent_block_csum_set(inode, neh);
1362 63683 : set_buffer_uptodate(bh);
1363 63685 : set_buffer_verified(bh);
1364 63683 : unlock_buffer(bh);
1365 :
1366 63684 : err = ext4_handle_dirty_metadata(handle, inode, bh);
1367 63685 : if (err)
1368 0 : goto out;
1369 :
1370 : /* Update top-level index: num,max,pointer */
1371 63685 : neh = ext_inode_hdr(inode);
1372 63685 : neh->eh_entries = cpu_to_le16(1);
1373 63685 : ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1374 63685 : if (neh->eh_depth == 0) {
1375 : /* Root extent block becomes index block */
1376 63636 : neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1377 63636 : EXT_FIRST_INDEX(neh)->ei_block =
1378 : EXT_FIRST_EXTENT(neh)->ee_block;
1379 : }
1380 63685 : ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n",
1381 : le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1382 : le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1383 : ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1384 :
1385 63685 : le16_add_cpu(&neh->eh_depth, 1);
1386 63685 : err = ext4_mark_inode_dirty(handle, inode);
1387 63685 : out:
1388 63685 : brelse(bh);
1389 :
1390 63684 : return err;
1391 : }
1392 :
1393 : /*
1394 : * ext4_ext_create_new_leaf:
1395 : * finds empty index and adds new leaf.
1396 : * if no free index is found, then it requests in-depth growing.
1397 : */
1398 69881 : static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1399 : unsigned int mb_flags,
1400 : unsigned int gb_flags,
1401 : struct ext4_ext_path **ppath,
1402 : struct ext4_extent *newext)
1403 : {
1404 69881 : struct ext4_ext_path *path = *ppath;
1405 69881 : struct ext4_ext_path *curp;
1406 69881 : int depth, i, err = 0;
1407 :
1408 69930 : repeat:
1409 69930 : i = depth = ext_depth(inode);
1410 :
1411 : /* walk up to the tree and look for free index entry */
1412 69930 : curp = path + depth;
1413 75958 : while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
1414 6028 : i--;
1415 6028 : curp--;
1416 : }
1417 :
1418 : /* we use already allocated block for index block,
1419 : * so subsequent data blocks should be contiguous */
1420 69930 : if (EXT_HAS_FREE_INDEX(curp)) {
1421 : /* if we found index with free entry, then use that
1422 : * entry: create all needed subtree and add new leaf */
1423 5969 : err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
1424 5969 : if (err)
1425 0 : goto out;
1426 :
1427 : /* refill path */
1428 5969 : path = ext4_find_extent(inode,
1429 5969 : (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1430 : ppath, gb_flags);
1431 5969 : if (IS_ERR(path))
1432 0 : err = PTR_ERR(path);
1433 : } else {
1434 : /* tree is full, time to grow in depth */
1435 63961 : err = ext4_ext_grow_indepth(handle, inode, mb_flags);
1436 63961 : if (err)
1437 276 : goto out;
1438 :
1439 : /* refill path */
1440 63685 : path = ext4_find_extent(inode,
1441 63685 : (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1442 : ppath, gb_flags);
1443 63683 : if (IS_ERR(path)) {
1444 0 : err = PTR_ERR(path);
1445 0 : goto out;
1446 : }
1447 :
1448 : /*
1449 : * only first (depth 0 -> 1) produces free space;
1450 : * in all other cases we have to split the grown tree
1451 : */
1452 63683 : depth = ext_depth(inode);
1453 63683 : if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
1454 : /* now we need to split */
1455 49 : goto repeat;
1456 : }
1457 : }
1458 :
1459 63634 : out:
1460 69879 : return err;
1461 : }
1462 :
1463 : /*
1464 : * search the closest allocated block to the left for *logical
1465 : * and returns it at @logical + it's physical address at @phys
1466 : * if *logical is the smallest allocated block, the function
1467 : * returns 0 at @phys
1468 : * return value contains 0 (success) or error code
1469 : */
1470 3994862 : static int ext4_ext_search_left(struct inode *inode,
1471 : struct ext4_ext_path *path,
1472 : ext4_lblk_t *logical, ext4_fsblk_t *phys)
1473 : {
1474 3994862 : struct ext4_extent_idx *ix;
1475 3994862 : struct ext4_extent *ex;
1476 3994862 : int depth, ee_len;
1477 :
1478 3994862 : if (unlikely(path == NULL)) {
1479 0 : EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1480 0 : return -EFSCORRUPTED;
1481 : }
1482 3994862 : depth = path->p_depth;
1483 3994862 : *phys = 0;
1484 :
1485 3994862 : if (depth == 0 && path->p_ext == NULL)
1486 : return 0;
1487 :
1488 : /* usually extent in the path covers blocks smaller
1489 : * then *logical, but it can be that extent is the
1490 : * first one in the file */
1491 :
1492 3133304 : ex = path[depth].p_ext;
1493 3133304 : ee_len = ext4_ext_get_actual_len(ex);
1494 3133304 : if (*logical < le32_to_cpu(ex->ee_block)) {
1495 77993 : if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1496 0 : EXT4_ERROR_INODE(inode,
1497 : "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1498 : *logical, le32_to_cpu(ex->ee_block));
1499 0 : return -EFSCORRUPTED;
1500 : }
1501 102868 : while (--depth >= 0) {
1502 24875 : ix = path[depth].p_idx;
1503 24875 : if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1504 0 : EXT4_ERROR_INODE(inode,
1505 : "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1506 : ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
1507 : le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block),
1508 : depth);
1509 0 : return -EFSCORRUPTED;
1510 : }
1511 : }
1512 : return 0;
1513 : }
1514 :
1515 3055311 : if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1516 0 : EXT4_ERROR_INODE(inode,
1517 : "logical %d < ee_block %d + ee_len %d!",
1518 : *logical, le32_to_cpu(ex->ee_block), ee_len);
1519 0 : return -EFSCORRUPTED;
1520 : }
1521 :
1522 3055311 : *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1523 3055311 : *phys = ext4_ext_pblock(ex) + ee_len - 1;
1524 3055311 : return 0;
1525 : }
1526 :
1527 : /*
1528 : * Search the closest allocated block to the right for *logical
1529 : * and returns it at @logical + it's physical address at @phys.
1530 : * If not exists, return 0 and @phys is set to 0. We will return
1531 : * 1 which means we found an allocated block and ret_ex is valid.
1532 : * Or return a (< 0) error code.
1533 : */
1534 3994584 : static int ext4_ext_search_right(struct inode *inode,
1535 : struct ext4_ext_path *path,
1536 : ext4_lblk_t *logical, ext4_fsblk_t *phys,
1537 : struct ext4_extent *ret_ex)
1538 : {
1539 3994584 : struct buffer_head *bh = NULL;
1540 3994584 : struct ext4_extent_header *eh;
1541 3994584 : struct ext4_extent_idx *ix;
1542 3994584 : struct ext4_extent *ex;
1543 3994584 : int depth; /* Note, NOT eh_depth; depth from top of tree */
1544 3994584 : int ee_len;
1545 :
1546 3994584 : if (unlikely(path == NULL)) {
1547 0 : EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1548 0 : return -EFSCORRUPTED;
1549 : }
1550 3994584 : depth = path->p_depth;
1551 3994584 : *phys = 0;
1552 :
1553 3994584 : if (depth == 0 && path->p_ext == NULL)
1554 : return 0;
1555 :
1556 : /* usually extent in the path covers blocks smaller
1557 : * then *logical, but it can be that extent is the
1558 : * first one in the file */
1559 :
1560 3133069 : ex = path[depth].p_ext;
1561 3133069 : ee_len = ext4_ext_get_actual_len(ex);
1562 3133069 : if (*logical < le32_to_cpu(ex->ee_block)) {
1563 77992 : if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1564 0 : EXT4_ERROR_INODE(inode,
1565 : "first_extent(path[%d].p_hdr) != ex",
1566 : depth);
1567 0 : return -EFSCORRUPTED;
1568 : }
1569 102867 : while (--depth >= 0) {
1570 24875 : ix = path[depth].p_idx;
1571 24875 : if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1572 0 : EXT4_ERROR_INODE(inode,
1573 : "ix != EXT_FIRST_INDEX *logical %d!",
1574 : *logical);
1575 0 : return -EFSCORRUPTED;
1576 : }
1577 : }
1578 77992 : goto found_extent;
1579 : }
1580 :
1581 3055077 : if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1582 0 : EXT4_ERROR_INODE(inode,
1583 : "logical %d < ee_block %d + ee_len %d!",
1584 : *logical, le32_to_cpu(ex->ee_block), ee_len);
1585 0 : return -EFSCORRUPTED;
1586 : }
1587 :
1588 3055077 : if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1589 : /* next allocated block in this leaf */
1590 2009099 : ex++;
1591 2009099 : goto found_extent;
1592 : }
1593 :
1594 : /* go up and search for index to the right */
1595 1871740 : while (--depth >= 0) {
1596 851230 : ix = path[depth].p_idx;
1597 851230 : if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
1598 25468 : goto got_index;
1599 : }
1600 :
1601 : /* we've gone up to the root and found no index to the right */
1602 : return 0;
1603 :
1604 : got_index:
1605 : /* we've found index to the right, let's
1606 : * follow it and find the closest allocated
1607 : * block to the right */
1608 25468 : ix++;
1609 25473 : while (++depth < path->p_depth) {
1610 : /* subtract from p_depth to get proper eh_depth */
1611 5 : bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
1612 5 : if (IS_ERR(bh))
1613 0 : return PTR_ERR(bh);
1614 5 : eh = ext_block_hdr(bh);
1615 5 : ix = EXT_FIRST_INDEX(eh);
1616 5 : put_bh(bh);
1617 : }
1618 :
1619 25468 : bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
1620 25468 : if (IS_ERR(bh))
1621 0 : return PTR_ERR(bh);
1622 25468 : eh = ext_block_hdr(bh);
1623 25468 : ex = EXT_FIRST_EXTENT(eh);
1624 2112559 : found_extent:
1625 2112559 : *logical = le32_to_cpu(ex->ee_block);
1626 2112559 : *phys = ext4_ext_pblock(ex);
1627 2112559 : if (ret_ex)
1628 2112559 : *ret_ex = *ex;
1629 2112559 : if (bh)
1630 25471 : put_bh(bh);
1631 : return 1;
1632 : }
1633 :
1634 : /*
1635 : * ext4_ext_next_allocated_block:
1636 : * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1637 : * NOTE: it considers block number from index entry as
1638 : * allocated block. Thus, index entries have to be consistent
1639 : * with leaves.
1640 : */
1641 : ext4_lblk_t
1642 4668053 : ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1643 : {
1644 4668053 : int depth;
1645 :
1646 4668053 : BUG_ON(path == NULL);
1647 4668053 : depth = path->p_depth;
1648 :
1649 4668053 : if (depth == 0 && path->p_ext == NULL)
1650 : return EXT_MAX_BLOCKS;
1651 :
1652 7621106 : while (depth >= 0) {
1653 6003450 : struct ext4_ext_path *p = &path[depth];
1654 :
1655 6003450 : if (depth == path->p_depth) {
1656 : /* leaf */
1657 4668160 : if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr))
1658 3024730 : return le32_to_cpu(p->p_ext[1].ee_block);
1659 : } else {
1660 : /* index */
1661 1335290 : if (p->p_idx != EXT_LAST_INDEX(p->p_hdr))
1662 25667 : return le32_to_cpu(p->p_idx[1].ei_block);
1663 : }
1664 2953053 : depth--;
1665 : }
1666 :
1667 : return EXT_MAX_BLOCKS;
1668 : }
1669 :
1670 : /*
1671 : * ext4_ext_next_leaf_block:
1672 : * returns first allocated block from next leaf or EXT_MAX_BLOCKS
1673 : */
1674 27478 : static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
1675 : {
1676 27478 : int depth;
1677 :
1678 27478 : BUG_ON(path == NULL);
1679 27478 : depth = path->p_depth;
1680 :
1681 : /* zero-tree has no leaf blocks at all */
1682 27478 : if (depth == 0)
1683 : return EXT_MAX_BLOCKS;
1684 :
1685 : /* go to index block */
1686 1440 : depth--;
1687 :
1688 1692 : while (depth >= 0) {
1689 1552 : if (path[depth].p_idx !=
1690 1552 : EXT_LAST_INDEX(path[depth].p_hdr))
1691 1300 : return (ext4_lblk_t)
1692 : le32_to_cpu(path[depth].p_idx[1].ei_block);
1693 252 : depth--;
1694 : }
1695 :
1696 : return EXT_MAX_BLOCKS;
1697 : }
1698 :
1699 : /*
1700 : * ext4_ext_correct_indexes:
1701 : * if leaf gets modified and modified extent is first in the leaf,
1702 : * then we have to correct all indexes above.
1703 : * TODO: do we need to correct tree in all cases?
1704 : */
1705 5316735 : static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1706 : struct ext4_ext_path *path)
1707 : {
1708 5316735 : struct ext4_extent_header *eh;
1709 5316735 : int depth = ext_depth(inode);
1710 5316735 : struct ext4_extent *ex;
1711 5316735 : __le32 border;
1712 5316735 : int k, err = 0;
1713 :
1714 5316735 : eh = path[depth].p_hdr;
1715 5316735 : ex = path[depth].p_ext;
1716 :
1717 5316735 : if (unlikely(ex == NULL || eh == NULL)) {
1718 0 : EXT4_ERROR_INODE(inode,
1719 : "ex %p == NULL or eh %p == NULL", ex, eh);
1720 0 : return -EFSCORRUPTED;
1721 : }
1722 :
1723 5316735 : if (depth == 0) {
1724 : /* there is no tree at all */
1725 : return 0;
1726 : }
1727 :
1728 4084263 : if (ex != EXT_FIRST_EXTENT(eh)) {
1729 : /* we correct tree if first leaf got modified only */
1730 : return 0;
1731 : }
1732 :
1733 : /*
1734 : * TODO: we need correction if border is smaller than current one
1735 : */
1736 38882 : k = depth - 1;
1737 38882 : border = path[depth].p_ext->ee_block;
1738 38882 : err = ext4_ext_get_access(handle, inode, path + k);
1739 38882 : if (err)
1740 : return err;
1741 38882 : path[k].p_idx->ei_block = border;
1742 38882 : err = ext4_ext_dirty(handle, inode, path + k);
1743 38882 : if (err)
1744 : return err;
1745 :
1746 38916 : while (k--) {
1747 : /* change all left-side indexes */
1748 1588 : if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
1749 : break;
1750 34 : err = ext4_ext_get_access(handle, inode, path + k);
1751 34 : if (err)
1752 : break;
1753 34 : path[k].p_idx->ei_block = border;
1754 34 : err = ext4_ext_dirty(handle, inode, path + k);
1755 34 : if (err)
1756 : break;
1757 : }
1758 :
1759 : return err;
1760 : }
1761 :
1762 20148412 : static int ext4_can_extents_be_merged(struct inode *inode,
1763 : struct ext4_extent *ex1,
1764 : struct ext4_extent *ex2)
1765 : {
1766 20148412 : unsigned short ext1_ee_len, ext2_ee_len;
1767 :
1768 20148412 : if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
1769 : return 0;
1770 :
1771 17157125 : ext1_ee_len = ext4_ext_get_actual_len(ex1);
1772 17157125 : ext2_ee_len = ext4_ext_get_actual_len(ex2);
1773 :
1774 17157125 : if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
1775 17157125 : le32_to_cpu(ex2->ee_block))
1776 : return 0;
1777 :
1778 14743585 : if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
1779 : return 0;
1780 :
1781 14679318 : if (ext4_ext_is_unwritten(ex1) &&
1782 : ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
1783 : return 0;
1784 : #ifdef AGGRESSIVE_TEST
1785 : if (ext1_ee_len >= 4)
1786 : return 0;
1787 : #endif
1788 :
1789 14672701 : if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
1790 2035302 : return 1;
1791 : return 0;
1792 : }
1793 :
1794 : /*
1795 : * This function tries to merge the "ex" extent to the next extent in the tree.
1796 : * It always tries to merge towards right. If you want to merge towards
1797 : * left, pass "ex - 1" as argument instead of "ex".
1798 : * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1799 : * 1 if they got merged.
1800 : */
1801 18090023 : static int ext4_ext_try_to_merge_right(struct inode *inode,
1802 : struct ext4_ext_path *path,
1803 : struct ext4_extent *ex)
1804 : {
1805 18090023 : struct ext4_extent_header *eh;
1806 18090023 : unsigned int depth, len;
1807 18090023 : int merge_done = 0, unwritten;
1808 :
1809 18090023 : depth = ext_depth(inode);
1810 18090023 : BUG_ON(path[depth].p_hdr == NULL);
1811 : eh = path[depth].p_hdr;
1812 :
1813 19635598 : while (ex < EXT_LAST_EXTENT(eh)) {
1814 17634494 : if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1815 : break;
1816 : /* merge with next extent! */
1817 1545094 : unwritten = ext4_ext_is_unwritten(ex);
1818 3090188 : ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1819 : + ext4_ext_get_actual_len(ex + 1));
1820 1545094 : if (unwritten)
1821 1174677 : ext4_ext_mark_unwritten(ex);
1822 :
1823 1545094 : if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1824 1444069 : len = (EXT_LAST_EXTENT(eh) - ex - 1)
1825 1444069 : * sizeof(struct ext4_extent);
1826 2888138 : memmove(ex + 1, ex + 2, len);
1827 : }
1828 1545094 : le16_add_cpu(&eh->eh_entries, -1);
1829 1545094 : merge_done = 1;
1830 1545094 : WARN_ON(eh->eh_entries == 0);
1831 1545575 : if (!eh->eh_entries)
1832 0 : EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
1833 : }
1834 :
1835 18090504 : return merge_done;
1836 : }
1837 :
1838 : /*
1839 : * This function does a very simple check to see if we can collapse
1840 : * an extent tree with a single extent tree leaf block into the inode.
1841 : */
1842 5191509 : static void ext4_ext_try_to_merge_up(handle_t *handle,
1843 : struct inode *inode,
1844 : struct ext4_ext_path *path)
1845 : {
1846 5191509 : size_t s;
1847 5191509 : unsigned max_root = ext4_ext_space_root(inode, 0);
1848 5191509 : ext4_fsblk_t blk;
1849 :
1850 5191509 : if ((path[0].p_depth != 1) ||
1851 2380946 : (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1852 2262191 : (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1853 : return;
1854 :
1855 : /*
1856 : * We need to modify the block allocation bitmap and the block
1857 : * group descriptor to release the extent tree block. If we
1858 : * can't get the journal credits, give up.
1859 : */
1860 84166 : if (ext4_journal_extend(handle, 2,
1861 : ext4_free_metadata_revoke_credits(inode->i_sb, 1)))
1862 : return;
1863 :
1864 : /*
1865 : * Copy the extent data up to the inode
1866 : */
1867 42026 : blk = ext4_idx_pblock(path[0].p_idx);
1868 42026 : s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1869 : sizeof(struct ext4_extent_idx);
1870 42026 : s += sizeof(struct ext4_extent_header);
1871 :
1872 42026 : path[1].p_maxdepth = path[0].p_maxdepth;
1873 84052 : memcpy(path[0].p_hdr, path[1].p_hdr, s);
1874 42026 : path[0].p_depth = 0;
1875 42026 : path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1876 42026 : (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1877 42026 : path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1878 :
1879 42026 : brelse(path[1].p_bh);
1880 42026 : ext4_free_blocks(handle, inode, NULL, blk, 1,
1881 : EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1882 : }
1883 :
1884 : /*
1885 : * This function tries to merge the @ex extent to neighbours in the tree, then
1886 : * tries to collapse the extent tree into the inode.
1887 : */
1888 5191711 : static void ext4_ext_try_to_merge(handle_t *handle,
1889 : struct inode *inode,
1890 : struct ext4_ext_path *path,
1891 : struct ext4_extent *ex)
1892 : {
1893 5191711 : struct ext4_extent_header *eh;
1894 5191711 : unsigned int depth;
1895 5191711 : int merge_done = 0;
1896 :
1897 5191711 : depth = ext_depth(inode);
1898 5191711 : BUG_ON(path[depth].p_hdr == NULL);
1899 5191711 : eh = path[depth].p_hdr;
1900 :
1901 5191711 : if (ex > EXT_FIRST_EXTENT(eh))
1902 4219077 : merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1903 :
1904 4219080 : if (!merge_done)
1905 3759515 : (void) ext4_ext_try_to_merge_right(inode, path, ex);
1906 :
1907 5191514 : ext4_ext_try_to_merge_up(handle, inode, path);
1908 5191550 : }
1909 :
1910 : /*
1911 : * check if a portion of the "newext" extent overlaps with an
1912 : * existing extent.
1913 : *
1914 : * If there is an overlap discovered, it updates the length of the newext
1915 : * such that there will be no overlap, and then returns 1.
1916 : * If there is no overlap found, it returns 0.
1917 : */
1918 3994240 : static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1919 : struct inode *inode,
1920 : struct ext4_extent *newext,
1921 : struct ext4_ext_path *path)
1922 : {
1923 3994240 : ext4_lblk_t b1, b2;
1924 3994240 : unsigned int depth, len1;
1925 3994240 : unsigned int ret = 0;
1926 :
1927 3994240 : b1 = le32_to_cpu(newext->ee_block);
1928 3994240 : len1 = ext4_ext_get_actual_len(newext);
1929 3994240 : depth = ext_depth(inode);
1930 3994240 : if (!path[depth].p_ext)
1931 861488 : goto out;
1932 3132752 : b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
1933 :
1934 : /*
1935 : * get the next allocated block if the extent in the path
1936 : * is before the requested block(s)
1937 : */
1938 3132752 : if (b2 < b1) {
1939 3054775 : b2 = ext4_ext_next_allocated_block(path);
1940 3054125 : if (b2 == EXT_MAX_BLOCKS)
1941 1019644 : goto out;
1942 2034481 : b2 = EXT4_LBLK_CMASK(sbi, b2);
1943 : }
1944 :
1945 : /* check for wrap through zero on extent logical start block*/
1946 2112458 : if (b1 + len1 < b1) {
1947 0 : len1 = EXT_MAX_BLOCKS - b1;
1948 0 : newext->ee_len = cpu_to_le16(len1);
1949 0 : ret = 1;
1950 : }
1951 :
1952 : /* check for overlap */
1953 2112458 : if (b1 + len1 > b2) {
1954 0 : newext->ee_len = cpu_to_le16(b2 - b1);
1955 0 : ret = 1;
1956 : }
1957 2112458 : out:
1958 3993590 : return ret;
1959 : }
1960 :
1961 : /*
1962 : * ext4_ext_insert_extent:
1963 : * tries to merge requested extent into the existing extent or
1964 : * inserts requested extent as new one into the tree,
1965 : * creating new leaf in the no-space case.
1966 : */
1967 5294844 : int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1968 : struct ext4_ext_path **ppath,
1969 : struct ext4_extent *newext, int gb_flags)
1970 : {
1971 5294844 : struct ext4_ext_path *path = *ppath;
1972 5294844 : struct ext4_extent_header *eh;
1973 5294844 : struct ext4_extent *ex, *fex;
1974 5294844 : struct ext4_extent *nearex; /* nearest extent */
1975 5294844 : struct ext4_ext_path *npath = NULL;
1976 5294844 : int depth, len, err;
1977 5294844 : ext4_lblk_t next;
1978 5294844 : int mb_flags = 0, unwritten;
1979 :
1980 5294844 : if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1981 883577 : mb_flags |= EXT4_MB_DELALLOC_RESERVED;
1982 10589688 : if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1983 0 : EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1984 0 : return -EFSCORRUPTED;
1985 : }
1986 5294844 : depth = ext_depth(inode);
1987 5294844 : ex = path[depth].p_ext;
1988 5294844 : eh = path[depth].p_hdr;
1989 5294844 : if (unlikely(path[depth].p_hdr == NULL)) {
1990 0 : EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1991 0 : return -EFSCORRUPTED;
1992 : }
1993 :
1994 : /* try to insert block into found extent and return */
1995 5294844 : if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
1996 :
1997 : /*
1998 : * Try to see whether we should rather test the extent on
1999 : * right from ex, or from the left of ex. This is because
2000 : * ext4_find_extent() can return either extent on the
2001 : * left, or on the right from the searched position. This
2002 : * will make merging more effective.
2003 : */
2004 1570425 : if (ex < EXT_LAST_EXTENT(eh) &&
2005 732536 : (le32_to_cpu(ex->ee_block) +
2006 732536 : ext4_ext_get_actual_len(ex) <
2007 732536 : le32_to_cpu(newext->ee_block))) {
2008 137754 : ex += 1;
2009 137754 : goto prepend;
2010 1432671 : } else if ((ex > EXT_FIRST_EXTENT(eh)) &&
2011 1329453 : (le32_to_cpu(newext->ee_block) +
2012 1329453 : ext4_ext_get_actual_len(newext) <
2013 1329453 : le32_to_cpu(ex->ee_block)))
2014 0 : ex -= 1;
2015 :
2016 : /* Try to append newex to the ex */
2017 1432671 : if (ext4_can_extents_be_merged(inode, ex, newext)) {
2018 488977 : ext_debug(inode, "append [%d]%d block to %u:[%d]%d"
2019 : "(from %llu)\n",
2020 : ext4_ext_is_unwritten(newext),
2021 : ext4_ext_get_actual_len(newext),
2022 : le32_to_cpu(ex->ee_block),
2023 : ext4_ext_is_unwritten(ex),
2024 : ext4_ext_get_actual_len(ex),
2025 : ext4_ext_pblock(ex));
2026 488977 : err = ext4_ext_get_access(handle, inode,
2027 : path + depth);
2028 489315 : if (err)
2029 : return err;
2030 489315 : unwritten = ext4_ext_is_unwritten(ex);
2031 978630 : ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
2032 : + ext4_ext_get_actual_len(newext));
2033 489315 : if (unwritten)
2034 351674 : ext4_ext_mark_unwritten(ex);
2035 489315 : nearex = ex;
2036 489315 : goto merge;
2037 : }
2038 :
2039 943694 : prepend:
2040 : /* Try to prepend newex to the ex */
2041 1081448 : if (ext4_can_extents_be_merged(inode, newext, ex)) {
2042 1173 : ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d"
2043 : "(from %llu)\n",
2044 : le32_to_cpu(newext->ee_block),
2045 : ext4_ext_is_unwritten(newext),
2046 : ext4_ext_get_actual_len(newext),
2047 : le32_to_cpu(ex->ee_block),
2048 : ext4_ext_is_unwritten(ex),
2049 : ext4_ext_get_actual_len(ex),
2050 : ext4_ext_pblock(ex));
2051 1173 : err = ext4_ext_get_access(handle, inode,
2052 : path + depth);
2053 1173 : if (err)
2054 : return err;
2055 :
2056 1173 : unwritten = ext4_ext_is_unwritten(ex);
2057 1173 : ex->ee_block = newext->ee_block;
2058 1173 : ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
2059 2346 : ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
2060 : + ext4_ext_get_actual_len(newext));
2061 1173 : if (unwritten)
2062 1173 : ext4_ext_mark_unwritten(ex);
2063 1173 : nearex = ex;
2064 1173 : goto merge;
2065 : }
2066 : }
2067 :
2068 4804694 : depth = ext_depth(inode);
2069 4804694 : eh = path[depth].p_hdr;
2070 4804694 : if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
2071 4733513 : goto has_space;
2072 :
2073 : /* probably next leaf has space for us? */
2074 71181 : fex = EXT_LAST_EXTENT(eh);
2075 71181 : next = EXT_MAX_BLOCKS;
2076 71181 : if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
2077 27478 : next = ext4_ext_next_leaf_block(path);
2078 27477 : if (next != EXT_MAX_BLOCKS) {
2079 1300 : ext_debug(inode, "next leaf block - %u\n", next);
2080 1300 : BUG_ON(npath != NULL);
2081 1300 : npath = ext4_find_extent(inode, next, NULL, gb_flags);
2082 1300 : if (IS_ERR(npath))
2083 0 : return PTR_ERR(npath);
2084 1300 : BUG_ON(npath->p_depth != path->p_depth);
2085 1300 : eh = npath[depth].p_hdr;
2086 1300 : if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
2087 1299 : ext_debug(inode, "next leaf isn't full(%d)\n",
2088 : le16_to_cpu(eh->eh_entries));
2089 1299 : path = npath;
2090 1299 : goto has_space;
2091 : }
2092 : ext_debug(inode, "next leaf has no free space(%d,%d)\n",
2093 : le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
2094 : }
2095 :
2096 : /*
2097 : * There is no free space in the found leaf.
2098 : * We're gonna add a new leaf in the tree.
2099 : */
2100 69881 : if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
2101 33810 : mb_flags |= EXT4_MB_USE_RESERVED;
2102 69881 : err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
2103 : ppath, newext);
2104 69879 : if (err)
2105 276 : goto cleanup;
2106 69603 : depth = ext_depth(inode);
2107 69603 : eh = path[depth].p_hdr;
2108 :
2109 4804415 : has_space:
2110 4804415 : nearex = path[depth].p_ext;
2111 :
2112 4804415 : err = ext4_ext_get_access(handle, inode, path + depth);
2113 4804470 : if (err)
2114 0 : goto cleanup;
2115 :
2116 4804470 : if (!nearex) {
2117 : /* there is no extent in this leaf, create first one */
2118 808947 : ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n",
2119 : le32_to_cpu(newext->ee_block),
2120 : ext4_ext_pblock(newext),
2121 : ext4_ext_is_unwritten(newext),
2122 : ext4_ext_get_actual_len(newext));
2123 808947 : nearex = EXT_FIRST_EXTENT(eh);
2124 : } else {
2125 3995523 : if (le32_to_cpu(newext->ee_block)
2126 3995523 : > le32_to_cpu(nearex->ee_block)) {
2127 : /* Insert after */
2128 3926917 : ext_debug(inode, "insert %u:%llu:[%d]%d before: "
2129 : "nearest %p\n",
2130 : le32_to_cpu(newext->ee_block),
2131 : ext4_ext_pblock(newext),
2132 : ext4_ext_is_unwritten(newext),
2133 : ext4_ext_get_actual_len(newext),
2134 : nearex);
2135 3926917 : nearex++;
2136 : } else {
2137 : /* Insert before */
2138 68606 : BUG_ON(newext->ee_block == nearex->ee_block);
2139 : ext_debug(inode, "insert %u:%llu:[%d]%d after: "
2140 : "nearest %p\n",
2141 : le32_to_cpu(newext->ee_block),
2142 : ext4_ext_pblock(newext),
2143 : ext4_ext_is_unwritten(newext),
2144 : ext4_ext_get_actual_len(newext),
2145 : nearex);
2146 : }
2147 3995523 : len = EXT_LAST_EXTENT(eh) - nearex + 1;
2148 3995523 : if (len > 0) {
2149 3220954 : ext_debug(inode, "insert %u:%llu:[%d]%d: "
2150 : "move %d extents from 0x%p to 0x%p\n",
2151 : le32_to_cpu(newext->ee_block),
2152 : ext4_ext_pblock(newext),
2153 : ext4_ext_is_unwritten(newext),
2154 : ext4_ext_get_actual_len(newext),
2155 : len, nearex, nearex + 1);
2156 6441908 : memmove(nearex + 1, nearex,
2157 : len * sizeof(struct ext4_extent));
2158 : }
2159 : }
2160 :
2161 4804470 : le16_add_cpu(&eh->eh_entries, 1);
2162 4804470 : path[depth].p_ext = nearex;
2163 4804470 : nearex->ee_block = newext->ee_block;
2164 4804470 : ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
2165 4804470 : nearex->ee_len = newext->ee_len;
2166 :
2167 5294958 : merge:
2168 : /* try to merge extents */
2169 5294958 : if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
2170 2071462 : ext4_ext_try_to_merge(handle, inode, path, nearex);
2171 :
2172 :
2173 : /* time to correct all indexes above */
2174 5294821 : err = ext4_ext_correct_indexes(handle, inode, path);
2175 5294352 : if (err)
2176 0 : goto cleanup;
2177 :
2178 5294352 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2179 :
2180 5295605 : cleanup:
2181 5295605 : ext4_free_ext_path(npath);
2182 5295487 : return err;
2183 : }
2184 :
2185 0 : static int ext4_fill_es_cache_info(struct inode *inode,
2186 : ext4_lblk_t block, ext4_lblk_t num,
2187 : struct fiemap_extent_info *fieinfo)
2188 : {
2189 0 : ext4_lblk_t next, end = block + num - 1;
2190 0 : struct extent_status es;
2191 0 : unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
2192 0 : unsigned int flags;
2193 0 : int err;
2194 :
2195 0 : while (block <= end) {
2196 0 : next = 0;
2197 0 : flags = 0;
2198 0 : if (!ext4_es_lookup_extent(inode, block, &next, &es))
2199 : break;
2200 0 : if (ext4_es_is_unwritten(&es))
2201 0 : flags |= FIEMAP_EXTENT_UNWRITTEN;
2202 0 : if (ext4_es_is_delayed(&es))
2203 0 : flags |= (FIEMAP_EXTENT_DELALLOC |
2204 : FIEMAP_EXTENT_UNKNOWN);
2205 0 : if (ext4_es_is_hole(&es))
2206 0 : flags |= EXT4_FIEMAP_EXTENT_HOLE;
2207 0 : if (next == 0)
2208 0 : flags |= FIEMAP_EXTENT_LAST;
2209 0 : if (flags & (FIEMAP_EXTENT_DELALLOC|
2210 : EXT4_FIEMAP_EXTENT_HOLE))
2211 0 : es.es_pblk = 0;
2212 : else
2213 0 : es.es_pblk = ext4_es_pblock(&es);
2214 0 : err = fiemap_fill_next_extent(fieinfo,
2215 0 : (__u64)es.es_lblk << blksize_bits,
2216 0 : (__u64)es.es_pblk << blksize_bits,
2217 0 : (__u64)es.es_len << blksize_bits,
2218 : flags);
2219 0 : if (next == 0)
2220 : break;
2221 0 : block = next;
2222 0 : if (err < 0)
2223 0 : return err;
2224 0 : if (err == 1)
2225 : return 0;
2226 : }
2227 : return 0;
2228 : }
2229 :
2230 :
2231 : /*
2232 : * ext4_ext_determine_hole - determine hole around given block
2233 : * @inode: inode we lookup in
2234 : * @path: path in extent tree to @lblk
2235 : * @lblk: pointer to logical block around which we want to determine hole
2236 : *
2237 : * Determine hole length (and start if easily possible) around given logical
2238 : * block. We don't try too hard to find the beginning of the hole but @path
2239 : * actually points to extent before @lblk, we provide it.
2240 : *
2241 : * The function returns the length of a hole starting at @lblk. We update @lblk
2242 : * to the beginning of the hole if we managed to find it.
2243 : */
2244 3495871 : static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
2245 : struct ext4_ext_path *path,
2246 : ext4_lblk_t *lblk)
2247 : {
2248 3495871 : int depth = ext_depth(inode);
2249 3495871 : struct ext4_extent *ex;
2250 3495871 : ext4_lblk_t len;
2251 :
2252 3495871 : ex = path[depth].p_ext;
2253 3495871 : if (ex == NULL) {
2254 : /* there is no extent yet, so gap is [0;-] */
2255 1874151 : *lblk = 0;
2256 1874151 : len = EXT_MAX_BLOCKS;
2257 1621720 : } else if (*lblk < le32_to_cpu(ex->ee_block)) {
2258 20289 : len = le32_to_cpu(ex->ee_block) - *lblk;
2259 1601431 : } else if (*lblk >= le32_to_cpu(ex->ee_block)
2260 1601431 : + ext4_ext_get_actual_len(ex)) {
2261 1601431 : ext4_lblk_t next;
2262 :
2263 1601431 : *lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
2264 1601431 : next = ext4_ext_next_allocated_block(path);
2265 1601379 : BUG_ON(next == *lblk);
2266 1601379 : len = next - *lblk;
2267 : } else {
2268 0 : BUG();
2269 : }
2270 3495819 : return len;
2271 : }
2272 :
2273 : /*
2274 : * ext4_ext_put_gap_in_cache:
2275 : * calculate boundaries of the gap that the requested block fits into
2276 : * and cache this gap
2277 : */
2278 : static void
2279 3493805 : ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
2280 : ext4_lblk_t hole_len)
2281 : {
2282 3493805 : struct extent_status es;
2283 :
2284 3493805 : ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
2285 3493805 : hole_start + hole_len - 1, &es);
2286 3503288 : if (es.es_len) {
2287 : /* There's delayed extent containing lblock? */
2288 1018920 : if (es.es_lblk <= hole_start)
2289 326227 : return;
2290 692693 : hole_len = min(es.es_lblk - hole_start, hole_len);
2291 : }
2292 3177061 : ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
2293 3177061 : ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
2294 : EXTENT_STATUS_HOLE);
2295 : }
2296 :
2297 : /*
2298 : * ext4_ext_rm_idx:
2299 : * removes index from the index block.
2300 : */
2301 12896 : static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2302 : struct ext4_ext_path *path, int depth)
2303 : {
2304 12896 : int err;
2305 12896 : ext4_fsblk_t leaf;
2306 :
2307 : /* free index block */
2308 12896 : depth--;
2309 12896 : path = path + depth;
2310 12896 : leaf = ext4_idx_pblock(path->p_idx);
2311 12896 : if (unlikely(path->p_hdr->eh_entries == 0)) {
2312 0 : EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
2313 0 : return -EFSCORRUPTED;
2314 : }
2315 12896 : err = ext4_ext_get_access(handle, inode, path);
2316 12896 : if (err)
2317 : return err;
2318 :
2319 12896 : if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) {
2320 0 : int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx;
2321 0 : len *= sizeof(struct ext4_extent_idx);
2322 0 : memmove(path->p_idx, path->p_idx + 1, len);
2323 : }
2324 :
2325 12896 : le16_add_cpu(&path->p_hdr->eh_entries, -1);
2326 12896 : err = ext4_ext_dirty(handle, inode, path);
2327 12896 : if (err)
2328 : return err;
2329 12896 : ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf);
2330 12896 : trace_ext4_ext_rm_idx(inode, leaf);
2331 :
2332 12896 : ext4_free_blocks(handle, inode, NULL, leaf, 1,
2333 : EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
2334 :
2335 12931 : while (--depth >= 0) {
2336 2209 : if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
2337 : break;
2338 35 : path--;
2339 35 : err = ext4_ext_get_access(handle, inode, path);
2340 35 : if (err)
2341 : break;
2342 35 : path->p_idx->ei_block = (path+1)->p_idx->ei_block;
2343 35 : err = ext4_ext_dirty(handle, inode, path);
2344 35 : if (err)
2345 : break;
2346 : }
2347 : return err;
2348 : }
2349 :
2350 : /*
2351 : * ext4_ext_calc_credits_for_single_extent:
2352 : * This routine returns max. credits that needed to insert an extent
2353 : * to the extent tree.
2354 : * When pass the actual path, the caller should calculate credits
2355 : * under i_data_sem.
2356 : */
2357 0 : int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
2358 : struct ext4_ext_path *path)
2359 : {
2360 0 : if (path) {
2361 0 : int depth = ext_depth(inode);
2362 0 : int ret = 0;
2363 :
2364 : /* probably there is space in leaf? */
2365 0 : if (le16_to_cpu(path[depth].p_hdr->eh_entries)
2366 0 : < le16_to_cpu(path[depth].p_hdr->eh_max)) {
2367 :
2368 : /*
2369 : * There are some space in the leaf tree, no
2370 : * need to account for leaf block credit
2371 : *
2372 : * bitmaps and block group descriptor blocks
2373 : * and other metadata blocks still need to be
2374 : * accounted.
2375 : */
2376 : /* 1 bitmap, 1 block group descriptor */
2377 0 : ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
2378 0 : return ret;
2379 : }
2380 : }
2381 :
2382 0 : return ext4_chunk_trans_blocks(inode, nrblocks);
2383 : }
2384 :
2385 : /*
2386 : * How many index/leaf blocks need to change/allocate to add @extents extents?
2387 : *
2388 : * If we add a single extent, then in the worse case, each tree level
2389 : * index/leaf need to be changed in case of the tree split.
2390 : *
2391 : * If more extents are inserted, they could cause the whole tree split more
2392 : * than once, but this is really rare.
2393 : */
2394 8048744 : int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
2395 : {
2396 8246148 : int index;
2397 8246148 : int depth;
2398 :
2399 : /* If we are converting the inline data, only one is needed here. */
2400 8048744 : if (ext4_has_inline_data(inode))
2401 : return 1;
2402 :
2403 8246148 : depth = ext_depth(inode);
2404 :
2405 8048744 : if (extents <= 1)
2406 8048736 : index = depth * 2;
2407 : else
2408 197412 : index = depth * 3;
2409 :
2410 : return index;
2411 : }
2412 :
2413 2324204 : static inline int get_default_free_blocks_flags(struct inode *inode)
2414 : {
2415 2324204 : if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
2416 : ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
2417 : return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
2418 2131236 : else if (ext4_should_journal_data(inode))
2419 0 : return EXT4_FREE_BLOCKS_FORGET;
2420 : return 0;
2421 : }
2422 :
2423 : /*
2424 : * ext4_rereserve_cluster - increment the reserved cluster count when
2425 : * freeing a cluster with a pending reservation
2426 : *
2427 : * @inode - file containing the cluster
2428 : * @lblk - logical block in cluster to be reserved
2429 : *
2430 : * Increments the reserved cluster count and adjusts quota in a bigalloc
2431 : * file system when freeing a partial cluster containing at least one
2432 : * delayed and unwritten block. A partial cluster meeting that
2433 : * requirement will have a pending reservation. If so, the
2434 : * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
2435 : * defer reserved and allocated space accounting to a subsequent call
2436 : * to this function.
2437 : */
2438 0 : static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
2439 : {
2440 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2441 0 : struct ext4_inode_info *ei = EXT4_I(inode);
2442 :
2443 0 : dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
2444 :
2445 0 : spin_lock(&ei->i_block_reservation_lock);
2446 0 : ei->i_reserved_data_blocks++;
2447 0 : percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
2448 0 : spin_unlock(&ei->i_block_reservation_lock);
2449 :
2450 0 : percpu_counter_add(&sbi->s_freeclusters_counter, 1);
2451 0 : ext4_remove_pending(inode, lblk);
2452 0 : }
2453 :
2454 2324192 : static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2455 : struct ext4_extent *ex,
2456 : struct partial_cluster *partial,
2457 : ext4_lblk_t from, ext4_lblk_t to)
2458 : {
2459 2324192 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2460 2324192 : unsigned short ee_len = ext4_ext_get_actual_len(ex);
2461 2324192 : ext4_fsblk_t last_pblk, pblk;
2462 2324192 : ext4_lblk_t num;
2463 2324192 : int flags;
2464 :
2465 : /* only extent tail removal is allowed */
2466 2324192 : if (from < le32_to_cpu(ex->ee_block) ||
2467 2324192 : to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
2468 0 : ext4_error(sbi->s_sb,
2469 : "strange request: removal(2) %u-%u from %u:%u",
2470 : from, to, le32_to_cpu(ex->ee_block), ee_len);
2471 0 : return 0;
2472 : }
2473 :
2474 : #ifdef EXTENTS_STATS
2475 : spin_lock(&sbi->s_ext_stats_lock);
2476 : sbi->s_ext_blocks += ee_len;
2477 : sbi->s_ext_extents++;
2478 : if (ee_len < sbi->s_ext_min)
2479 : sbi->s_ext_min = ee_len;
2480 : if (ee_len > sbi->s_ext_max)
2481 : sbi->s_ext_max = ee_len;
2482 : if (ext_depth(inode) > sbi->s_depth_max)
2483 : sbi->s_depth_max = ext_depth(inode);
2484 : spin_unlock(&sbi->s_ext_stats_lock);
2485 : #endif
2486 :
2487 2324204 : trace_ext4_remove_blocks(inode, ex, from, to, partial);
2488 :
2489 : /*
2490 : * if we have a partial cluster, and it's different from the
2491 : * cluster of the last block in the extent, we free it
2492 : */
2493 2324207 : last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
2494 :
2495 2324207 : if (partial->state != initial &&
2496 0 : partial->pclu != EXT4_B2C(sbi, last_pblk)) {
2497 0 : if (partial->state == tofree) {
2498 0 : flags = get_default_free_blocks_flags(inode);
2499 0 : if (ext4_is_pending(inode, partial->lblk))
2500 0 : flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2501 0 : ext4_free_blocks(handle, inode, NULL,
2502 0 : EXT4_C2B(sbi, partial->pclu),
2503 0 : sbi->s_cluster_ratio, flags);
2504 0 : if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2505 0 : ext4_rereserve_cluster(inode, partial->lblk);
2506 : }
2507 0 : partial->state = initial;
2508 : }
2509 :
2510 2324207 : num = le32_to_cpu(ex->ee_block) + ee_len - from;
2511 2324207 : pblk = ext4_ext_pblock(ex) + ee_len - num;
2512 :
2513 : /*
2514 : * We free the partial cluster at the end of the extent (if any),
2515 : * unless the cluster is used by another extent (partial_cluster
2516 : * state is nofree). If a partial cluster exists here, it must be
2517 : * shared with the last block in the extent.
2518 : */
2519 2324207 : flags = get_default_free_blocks_flags(inode);
2520 :
2521 : /* partial, left end cluster aligned, right end unaligned */
2522 2324197 : if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
2523 0 : (EXT4_LBLK_CMASK(sbi, to) >= from) &&
2524 0 : (partial->state != nofree)) {
2525 0 : if (ext4_is_pending(inode, to))
2526 0 : flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2527 0 : ext4_free_blocks(handle, inode, NULL,
2528 0 : EXT4_PBLK_CMASK(sbi, last_pblk),
2529 0 : sbi->s_cluster_ratio, flags);
2530 0 : if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2531 0 : ext4_rereserve_cluster(inode, to);
2532 0 : partial->state = initial;
2533 0 : flags = get_default_free_blocks_flags(inode);
2534 : }
2535 :
2536 2324197 : flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
2537 :
2538 : /*
2539 : * For bigalloc file systems, we never free a partial cluster
2540 : * at the beginning of the extent. Instead, we check to see if we
2541 : * need to free it on a subsequent call to ext4_remove_blocks,
2542 : * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
2543 : */
2544 2324197 : flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
2545 2324197 : ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
2546 :
2547 : /* reset the partial cluster if we've freed past it */
2548 2324264 : if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
2549 0 : partial->state = initial;
2550 :
2551 : /*
2552 : * If we've freed the entire extent but the beginning is not left
2553 : * cluster aligned and is not marked as ineligible for freeing we
2554 : * record the partial cluster at the beginning of the extent. It
2555 : * wasn't freed by the preceding ext4_free_blocks() call, and we
2556 : * need to look farther to the left to determine if it's to be freed
2557 : * (not shared with another extent). Else, reset the partial
2558 : * cluster - we're either done freeing or the beginning of the
2559 : * extent is left cluster aligned.
2560 : */
2561 2324264 : if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
2562 0 : if (partial->state == initial) {
2563 0 : partial->pclu = EXT4_B2C(sbi, pblk);
2564 0 : partial->lblk = from;
2565 0 : partial->state = tofree;
2566 : }
2567 : } else {
2568 2324264 : partial->state = initial;
2569 : }
2570 :
2571 : return 0;
2572 : }
2573 :
2574 : /*
2575 : * ext4_ext_rm_leaf() Removes the extents associated with the
2576 : * blocks appearing between "start" and "end". Both "start"
2577 : * and "end" must appear in the same extent or EIO is returned.
2578 : *
2579 : * @handle: The journal handle
2580 : * @inode: The files inode
2581 : * @path: The path to the leaf
2582 : * @partial_cluster: The cluster which we'll have to free if all extents
2583 : * has been released from it. However, if this value is
2584 : * negative, it's a cluster just to the right of the
2585 : * punched region and it must not be freed.
2586 : * @start: The first block to remove
2587 : * @end: The last block to remove
2588 : */
2589 : static int
2590 1173008 : ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2591 : struct ext4_ext_path *path,
2592 : struct partial_cluster *partial,
2593 : ext4_lblk_t start, ext4_lblk_t end)
2594 : {
2595 1173008 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2596 1173008 : int err = 0, correct_index = 0;
2597 1173008 : int depth = ext_depth(inode), credits, revoke_credits;
2598 1173008 : struct ext4_extent_header *eh;
2599 1173008 : ext4_lblk_t a, b;
2600 1173008 : unsigned num;
2601 1173008 : ext4_lblk_t ex_ee_block;
2602 1173008 : unsigned short ex_ee_len;
2603 1173008 : unsigned unwritten = 0;
2604 1173008 : struct ext4_extent *ex;
2605 1173008 : ext4_fsblk_t pblk;
2606 :
2607 : /* the header must be checked already in ext4_ext_remove_space() */
2608 1173008 : ext_debug(inode, "truncate since %u in leaf to %u\n", start, end);
2609 1173008 : if (!path[depth].p_hdr)
2610 203606 : path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2611 1173008 : eh = path[depth].p_hdr;
2612 1173008 : if (unlikely(path[depth].p_hdr == NULL)) {
2613 0 : EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
2614 0 : return -EFSCORRUPTED;
2615 : }
2616 : /* find where to start removing */
2617 1173008 : ex = path[depth].p_ext;
2618 1173008 : if (!ex)
2619 762705 : ex = EXT_LAST_EXTENT(eh);
2620 :
2621 1173008 : ex_ee_block = le32_to_cpu(ex->ee_block);
2622 1173008 : ex_ee_len = ext4_ext_get_actual_len(ex);
2623 :
2624 1173008 : trace_ext4_ext_rm_leaf(inode, start, ex, partial);
2625 :
2626 3668049 : while (ex >= EXT_FIRST_EXTENT(eh) &&
2627 3095215 : ex_ee_block + ex_ee_len > start) {
2628 :
2629 2495051 : if (ext4_ext_is_unwritten(ex))
2630 : unwritten = 1;
2631 : else
2632 1289164 : unwritten = 0;
2633 :
2634 2495051 : ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block,
2635 : unwritten, ex_ee_len);
2636 2495051 : path[depth].p_ext = ex;
2637 :
2638 2495051 : a = max(ex_ee_block, start);
2639 2495051 : b = min(ex_ee_block + ex_ee_len - 1, end);
2640 :
2641 2495051 : ext_debug(inode, " border %u:%u\n", a, b);
2642 :
2643 : /* If this extent is beyond the end of the hole, skip it */
2644 2495051 : if (end < ex_ee_block) {
2645 : /*
2646 : * We're going to skip this extent and move to another,
2647 : * so note that its first cluster is in use to avoid
2648 : * freeing it when removing blocks. Eventually, the
2649 : * right edge of the truncated/punched region will
2650 : * be just to the left.
2651 : */
2652 170761 : if (sbi->s_cluster_ratio > 1) {
2653 0 : pblk = ext4_ext_pblock(ex);
2654 0 : partial->pclu = EXT4_B2C(sbi, pblk);
2655 0 : partial->state = nofree;
2656 : }
2657 170761 : ex--;
2658 170761 : ex_ee_block = le32_to_cpu(ex->ee_block);
2659 170761 : ex_ee_len = ext4_ext_get_actual_len(ex);
2660 170761 : continue;
2661 2324290 : } else if (b != ex_ee_block + ex_ee_len - 1) {
2662 0 : EXT4_ERROR_INODE(inode,
2663 : "can not handle truncate %u:%u "
2664 : "on extent %u:%u",
2665 : start, end, ex_ee_block,
2666 : ex_ee_block + ex_ee_len - 1);
2667 0 : err = -EFSCORRUPTED;
2668 0 : goto out;
2669 2324290 : } else if (a != ex_ee_block) {
2670 : /* remove tail of the extent */
2671 215320 : num = a - ex_ee_block;
2672 : } else {
2673 : /* remove whole extent: excellent! */
2674 : num = 0;
2675 : }
2676 : /*
2677 : * 3 for leaf, sb, and inode plus 2 (bmap and group
2678 : * descriptor) for each block group; assume two block
2679 : * groups plus ex_ee_len/blocks_per_block_group for
2680 : * the worst case
2681 : */
2682 2324290 : credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
2683 2324290 : if (ex == EXT_FIRST_EXTENT(eh)) {
2684 417632 : correct_index = 1;
2685 417632 : credits += (ext_depth(inode)) + 1;
2686 : }
2687 2324290 : credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
2688 : /*
2689 : * We may end up freeing some index blocks and data from the
2690 : * punched range. Note that partial clusters are accounted for
2691 : * by ext4_free_data_revoke_credits().
2692 : */
2693 2324290 : revoke_credits =
2694 : ext4_free_metadata_revoke_credits(inode->i_sb,
2695 : ext_depth(inode)) +
2696 2324290 : ext4_free_data_revoke_credits(inode, b - a + 1);
2697 :
2698 2324291 : err = ext4_datasem_ensure_credits(handle, inode, credits,
2699 : credits, revoke_credits);
2700 2324275 : if (err) {
2701 68 : if (err > 0)
2702 68 : err = -EAGAIN;
2703 68 : goto out;
2704 : }
2705 :
2706 2324207 : err = ext4_ext_get_access(handle, inode, path + depth);
2707 2324195 : if (err)
2708 0 : goto out;
2709 :
2710 2324195 : err = ext4_remove_blocks(handle, inode, ex, partial, a, b);
2711 2324259 : if (err)
2712 0 : goto out;
2713 :
2714 2324259 : if (num == 0)
2715 : /* this extent is removed; mark slot entirely unused */
2716 2108943 : ext4_ext_store_pblock(ex, 0);
2717 :
2718 2324259 : ex->ee_len = cpu_to_le16(num);
2719 : /*
2720 : * Do not mark unwritten if all the blocks in the
2721 : * extent have been removed.
2722 : */
2723 2324259 : if (unwritten && num)
2724 78549 : ext4_ext_mark_unwritten(ex);
2725 : /*
2726 : * If the extent was completely released,
2727 : * we need to remove it from the leaf
2728 : */
2729 2324259 : if (num == 0) {
2730 2108948 : if (end != EXT_MAX_BLOCKS - 1) {
2731 : /*
2732 : * For hole punching, we need to scoot all the
2733 : * extents up when an extent is removed so that
2734 : * we dont have blank extents in the middle
2735 : */
2736 776910 : memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) *
2737 : sizeof(struct ext4_extent));
2738 :
2739 : /* Now get rid of the one at the end */
2740 776910 : memset(EXT_LAST_EXTENT(eh), 0,
2741 : sizeof(struct ext4_extent));
2742 : }
2743 2108948 : le16_add_cpu(&eh->eh_entries, -1);
2744 : }
2745 :
2746 2324259 : err = ext4_ext_dirty(handle, inode, path + depth);
2747 2324280 : if (err)
2748 0 : goto out;
2749 :
2750 2324280 : ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num,
2751 : ext4_ext_pblock(ex));
2752 2324280 : ex--;
2753 2324280 : ex_ee_block = le32_to_cpu(ex->ee_block);
2754 4648560 : ex_ee_len = ext4_ext_get_actual_len(ex);
2755 : }
2756 :
2757 1172960 : if (correct_index && eh->eh_entries)
2758 21975 : err = ext4_ext_correct_indexes(handle, inode, path);
2759 :
2760 : /*
2761 : * If there's a partial cluster and at least one extent remains in
2762 : * the leaf, free the partial cluster if it isn't shared with the
2763 : * current extent. If it is shared with the current extent
2764 : * we reset the partial cluster because we've reached the start of the
2765 : * truncated/punched region and we're done removing blocks.
2766 : */
2767 1172960 : if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
2768 0 : pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
2769 0 : if (partial->pclu != EXT4_B2C(sbi, pblk)) {
2770 0 : int flags = get_default_free_blocks_flags(inode);
2771 :
2772 0 : if (ext4_is_pending(inode, partial->lblk))
2773 0 : flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2774 0 : ext4_free_blocks(handle, inode, NULL,
2775 0 : EXT4_C2B(sbi, partial->pclu),
2776 0 : sbi->s_cluster_ratio, flags);
2777 0 : if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2778 0 : ext4_rereserve_cluster(inode, partial->lblk);
2779 : }
2780 0 : partial->state = initial;
2781 : }
2782 :
2783 : /* if this leaf is free, then we should
2784 : * remove it from index block above */
2785 1172960 : if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
2786 12861 : err = ext4_ext_rm_idx(handle, inode, path, depth);
2787 :
2788 1160099 : out:
2789 : return err;
2790 : }
2791 :
2792 : /*
2793 : * ext4_ext_more_to_rm:
2794 : * returns 1 if current index has to be freed (even partial)
2795 : */
2796 : static int
2797 762220 : ext4_ext_more_to_rm(struct ext4_ext_path *path)
2798 : {
2799 762220 : BUG_ON(path->p_idx == NULL);
2800 :
2801 762220 : if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
2802 : return 0;
2803 :
2804 : /*
2805 : * if truncate on deeper level happened, it wasn't partial,
2806 : * so we have to consider current index for truncation
2807 : */
2808 235789 : if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
2809 31825 : return 0;
2810 : return 1;
2811 : }
2812 :
2813 1147183 : int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2814 : ext4_lblk_t end)
2815 : {
2816 1147183 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2817 1147183 : int depth = ext_depth(inode);
2818 1147183 : struct ext4_ext_path *path = NULL;
2819 1147183 : struct partial_cluster partial;
2820 1147183 : handle_t *handle;
2821 1147183 : int i = 0, err = 0;
2822 :
2823 1147183 : partial.pclu = 0;
2824 1147183 : partial.lblk = 0;
2825 1147183 : partial.state = initial;
2826 :
2827 1147183 : ext_debug(inode, "truncate since %u to %u\n", start, end);
2828 :
2829 : /* probably first extent we're gonna free will be last in block */
2830 1147183 : handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
2831 : depth + 1,
2832 : ext4_free_metadata_revoke_credits(inode->i_sb, depth));
2833 1147122 : if (IS_ERR(handle))
2834 0 : return PTR_ERR(handle);
2835 :
2836 1147122 : again:
2837 1147190 : trace_ext4_ext_remove_space(inode, start, end, depth);
2838 :
2839 : /*
2840 : * Check if we are removing extents inside the extent tree. If that
2841 : * is the case, we are going to punch a hole inside the extent tree
2842 : * so we have to check whether we need to split the extent covering
2843 : * the last block to remove so we can easily remove the part of it
2844 : * in ext4_ext_rm_leaf().
2845 : */
2846 1147084 : if (end < EXT_MAX_BLOCKS - 1) {
2847 420670 : struct ext4_extent *ex;
2848 420670 : ext4_lblk_t ee_block, ex_end, lblk;
2849 420670 : ext4_fsblk_t pblk;
2850 :
2851 : /* find extent for or closest extent to this block */
2852 420670 : path = ext4_find_extent(inode, end, NULL,
2853 : EXT4_EX_NOCACHE | EXT4_EX_NOFAIL);
2854 420670 : if (IS_ERR(path)) {
2855 0 : ext4_journal_stop(handle);
2856 0 : return PTR_ERR(path);
2857 : }
2858 420670 : depth = ext_depth(inode);
2859 : /* Leaf not may not exist only if inode has no blocks at all */
2860 420670 : ex = path[depth].p_ext;
2861 420670 : if (!ex) {
2862 10253 : if (depth) {
2863 0 : EXT4_ERROR_INODE(inode,
2864 : "path[%d].p_hdr == NULL",
2865 : depth);
2866 0 : err = -EFSCORRUPTED;
2867 : }
2868 10253 : goto out;
2869 : }
2870 :
2871 410417 : ee_block = le32_to_cpu(ex->ee_block);
2872 410417 : ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1;
2873 :
2874 : /*
2875 : * See if the last block is inside the extent, if so split
2876 : * the extent at 'end' block so we can easily remove the
2877 : * tail of the first part of the split extent in
2878 : * ext4_ext_rm_leaf().
2879 : */
2880 410417 : if (end >= ee_block && end < ex_end) {
2881 :
2882 : /*
2883 : * If we're going to split the extent, note that
2884 : * the cluster containing the block after 'end' is
2885 : * in use to avoid freeing it when removing blocks.
2886 : */
2887 164397 : if (sbi->s_cluster_ratio > 1) {
2888 0 : pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
2889 0 : partial.pclu = EXT4_B2C(sbi, pblk);
2890 0 : partial.state = nofree;
2891 : }
2892 :
2893 : /*
2894 : * Split the extent in two so that 'end' is the last
2895 : * block in the first new extent. Also we should not
2896 : * fail removing space due to ENOSPC so try to use
2897 : * reserved block if that happens.
2898 : */
2899 164397 : err = ext4_force_split_extent_at(handle, inode, &path,
2900 : end + 1, 1);
2901 164397 : if (err < 0)
2902 0 : goto out;
2903 :
2904 246020 : } else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
2905 0 : partial.state == initial) {
2906 : /*
2907 : * If we're punching, there's an extent to the right.
2908 : * If the partial cluster hasn't been set, set it to
2909 : * that extent's first cluster and its state to nofree
2910 : * so it won't be freed should it contain blocks to be
2911 : * removed. If it's already set (tofree/nofree), we're
2912 : * retrying and keep the original partial cluster info
2913 : * so a cluster marked tofree as a result of earlier
2914 : * extent removal is not lost.
2915 : */
2916 0 : lblk = ex_end + 1;
2917 0 : err = ext4_ext_search_right(inode, path, &lblk, &pblk,
2918 : NULL);
2919 0 : if (err < 0)
2920 0 : goto out;
2921 0 : if (pblk) {
2922 0 : partial.pclu = EXT4_B2C(sbi, pblk);
2923 0 : partial.state = nofree;
2924 : }
2925 : }
2926 : }
2927 : /*
2928 : * We start scanning from right side, freeing all the blocks
2929 : * after i_size and walking into the tree depth-wise.
2930 : */
2931 1136831 : depth = ext_depth(inode);
2932 1136831 : if (path) {
2933 : int k = i = depth;
2934 441600 : while (--k > 0)
2935 31274 : path[k].p_block =
2936 31274 : le16_to_cpu(path[k].p_hdr->eh_entries)+1;
2937 : } else {
2938 726505 : path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
2939 : GFP_NOFS | __GFP_NOFAIL);
2940 726432 : if (path == NULL) {
2941 0 : ext4_journal_stop(handle);
2942 0 : return -ENOMEM;
2943 : }
2944 726432 : path[0].p_maxdepth = path[0].p_depth = depth;
2945 726432 : path[0].p_hdr = ext_inode_hdr(inode);
2946 726432 : i = 0;
2947 :
2948 726432 : if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
2949 0 : err = -EFSCORRUPTED;
2950 0 : goto out;
2951 : }
2952 : }
2953 : err = 0;
2954 :
2955 3072089 : while (i >= 0 && err == 0) {
2956 1935157 : if (i == depth) {
2957 : /* this is leaf block */
2958 1173015 : err = ext4_ext_rm_leaf(handle, inode, path,
2959 : &partial, start, end);
2960 : /* root level has p_bh == NULL, brelse() eats this */
2961 1172949 : brelse(path[i].p_bh);
2962 1172998 : path[i].p_bh = NULL;
2963 1172998 : i--;
2964 1172998 : continue;
2965 : }
2966 :
2967 : /* this is index block */
2968 762142 : if (!path[i].p_hdr) {
2969 374 : ext_debug(inode, "initialize header\n");
2970 374 : path[i].p_hdr = ext_block_hdr(path[i].p_bh);
2971 : }
2972 :
2973 762142 : if (!path[i].p_idx) {
2974 : /* this level hasn't been touched yet */
2975 167814 : path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
2976 167814 : path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
2977 167814 : ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
2978 : path[i].p_hdr,
2979 : le16_to_cpu(path[i].p_hdr->eh_entries));
2980 : } else {
2981 : /* we were already here, see at next index */
2982 594328 : path[i].p_idx--;
2983 : }
2984 :
2985 762142 : ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
2986 : i, EXT_FIRST_INDEX(path[i].p_hdr),
2987 : path[i].p_idx);
2988 762142 : if (ext4_ext_more_to_rm(path + i)) {
2989 203965 : struct buffer_head *bh;
2990 : /* go to the next level */
2991 203965 : ext_debug(inode, "move to level %d (block %llu)\n",
2992 : i + 1, ext4_idx_pblock(path[i].p_idx));
2993 203965 : memset(path + i + 1, 0, sizeof(*path));
2994 203965 : bh = read_extent_tree_block(inode, path[i].p_idx,
2995 : depth - i - 1,
2996 : EXT4_EX_NOCACHE);
2997 203982 : if (IS_ERR(bh)) {
2998 : /* should we reset i_size? */
2999 1 : err = PTR_ERR(bh);
3000 1 : break;
3001 : }
3002 : /* Yield here to deal with large extent trees.
3003 : * Should be a no-op if we did IO above. */
3004 203981 : cond_resched();
3005 203979 : if (WARN_ON(i + 1 > depth)) {
3006 : err = -EFSCORRUPTED;
3007 : break;
3008 : }
3009 203979 : path[i + 1].p_bh = bh;
3010 :
3011 : /* save actual number of indexes since this
3012 : * number is changed at the next iteration */
3013 203979 : path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
3014 203979 : i++;
3015 : } else {
3016 : /* we finished processing this index, go up */
3017 558256 : if (path[i].p_hdr->eh_entries == 0 && i > 0) {
3018 : /* index is empty, remove it;
3019 : * handle must be already prepared by the
3020 : * truncatei_leaf() */
3021 35 : err = ext4_ext_rm_idx(handle, inode, path, i);
3022 : }
3023 : /* root level has p_bh == NULL, brelse() eats this */
3024 558256 : brelse(path[i].p_bh);
3025 558256 : path[i].p_bh = NULL;
3026 558256 : i--;
3027 558256 : ext_debug(inode, "return to level %d\n", i);
3028 : }
3029 : }
3030 :
3031 1136933 : trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial,
3032 1136933 : path->p_hdr->eh_entries);
3033 :
3034 : /*
3035 : * if there's a partial cluster and we have removed the first extent
3036 : * in the file, then we also free the partial cluster, if any
3037 : */
3038 1136772 : if (partial.state == tofree && err == 0) {
3039 0 : int flags = get_default_free_blocks_flags(inode);
3040 :
3041 0 : if (ext4_is_pending(inode, partial.lblk))
3042 0 : flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
3043 0 : ext4_free_blocks(handle, inode, NULL,
3044 0 : EXT4_C2B(sbi, partial.pclu),
3045 0 : sbi->s_cluster_ratio, flags);
3046 0 : if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
3047 0 : ext4_rereserve_cluster(inode, partial.lblk);
3048 0 : partial.state = initial;
3049 : }
3050 :
3051 : /* TODO: flexible tree reduction should be here */
3052 1136772 : if (path->p_hdr->eh_entries == 0) {
3053 : /*
3054 : * truncate to zero freed all the tree,
3055 : * so we need to correct eh_depth
3056 : */
3057 542132 : err = ext4_ext_get_access(handle, inode, path);
3058 542135 : if (err == 0) {
3059 542135 : ext_inode_hdr(inode)->eh_depth = 0;
3060 542135 : ext_inode_hdr(inode)->eh_max =
3061 : cpu_to_le16(ext4_ext_space_root(inode, 0));
3062 542135 : err = ext4_ext_dirty(handle, inode, path);
3063 : }
3064 : }
3065 594640 : out:
3066 1147376 : ext4_free_ext_path(path);
3067 1147264 : path = NULL;
3068 1147264 : if (err == -EAGAIN)
3069 68 : goto again;
3070 1147196 : ext4_journal_stop(handle);
3071 :
3072 1147196 : return err;
3073 : }
3074 :
3075 : /*
3076 : * called at mount time
3077 : */
3078 2517 : void ext4_ext_init(struct super_block *sb)
3079 : {
3080 : /*
3081 : * possible initialization would be here
3082 : */
3083 :
3084 2517 : if (ext4_has_feature_extents(sb)) {
3085 : #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
3086 : printk(KERN_INFO "EXT4-fs: file extents enabled"
3087 : #ifdef AGGRESSIVE_TEST
3088 : ", aggressive tests"
3089 : #endif
3090 : #ifdef CHECK_BINSEARCH
3091 : ", check binsearch"
3092 : #endif
3093 : #ifdef EXTENTS_STATS
3094 : ", stats"
3095 : #endif
3096 : "\n");
3097 : #endif
3098 : #ifdef EXTENTS_STATS
3099 : spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
3100 : EXT4_SB(sb)->s_ext_min = 1 << 30;
3101 : EXT4_SB(sb)->s_ext_max = 0;
3102 : #endif
3103 2517 : }
3104 2517 : }
3105 :
3106 : /*
3107 : * called at umount time
3108 : */
3109 2518 : void ext4_ext_release(struct super_block *sb)
3110 : {
3111 2518 : if (!ext4_has_feature_extents(sb))
3112 : return;
3113 :
3114 : #ifdef EXTENTS_STATS
3115 : if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
3116 : struct ext4_sb_info *sbi = EXT4_SB(sb);
3117 : printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
3118 : sbi->s_ext_blocks, sbi->s_ext_extents,
3119 : sbi->s_ext_blocks / sbi->s_ext_extents);
3120 : printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
3121 : sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
3122 : }
3123 : #endif
3124 : }
3125 :
3126 10049 : static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
3127 : {
3128 10049 : ext4_lblk_t ee_block;
3129 10049 : ext4_fsblk_t ee_pblock;
3130 10049 : unsigned int ee_len;
3131 :
3132 10049 : ee_block = le32_to_cpu(ex->ee_block);
3133 10049 : ee_len = ext4_ext_get_actual_len(ex);
3134 10049 : ee_pblock = ext4_ext_pblock(ex);
3135 :
3136 10049 : if (ee_len == 0)
3137 : return;
3138 :
3139 1 : ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
3140 : EXTENT_STATUS_WRITTEN);
3141 : }
3142 :
3143 : /* FIXME!! we need to try to merge to left or right after zero-out */
3144 1 : static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
3145 : {
3146 1 : ext4_fsblk_t ee_pblock;
3147 1 : unsigned int ee_len;
3148 :
3149 1 : ee_len = ext4_ext_get_actual_len(ex);
3150 1 : ee_pblock = ext4_ext_pblock(ex);
3151 1 : return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
3152 : ee_len);
3153 : }
3154 :
3155 : /*
3156 : * ext4_split_extent_at() splits an extent at given block.
3157 : *
3158 : * @handle: the journal handle
3159 : * @inode: the file inode
3160 : * @path: the path to the extent
3161 : * @split: the logical block where the extent is splitted.
3162 : * @split_flags: indicates if the extent could be zeroout if split fails, and
3163 : * the states(init or unwritten) of new extents.
3164 : * @flags: flags used to insert new extent to extent tree.
3165 : *
3166 : *
3167 : * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
3168 : * of which are determined by split_flag.
3169 : *
3170 : * There are two cases:
3171 : * a> the extent are splitted into two extent.
3172 : * b> split is not needed, and just mark the extent.
3173 : *
3174 : * return 0 on success.
3175 : */
3176 2277710 : static int ext4_split_extent_at(handle_t *handle,
3177 : struct inode *inode,
3178 : struct ext4_ext_path **ppath,
3179 : ext4_lblk_t split,
3180 : int split_flag,
3181 : int flags)
3182 : {
3183 2277710 : struct ext4_ext_path *path = *ppath;
3184 2277710 : ext4_fsblk_t newblock;
3185 2277710 : ext4_lblk_t ee_block;
3186 2277710 : struct ext4_extent *ex, newex, orig_ex, zero_ex;
3187 2277710 : struct ext4_extent *ex2 = NULL;
3188 2277710 : unsigned int ee_len, depth;
3189 2277710 : int err = 0;
3190 :
3191 2277710 : BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
3192 : (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
3193 :
3194 2277710 : ext_debug(inode, "logical block %llu\n", (unsigned long long)split);
3195 :
3196 2277710 : ext4_ext_show_leaf(inode, path);
3197 :
3198 2277710 : depth = ext_depth(inode);
3199 2277710 : ex = path[depth].p_ext;
3200 2277710 : ee_block = le32_to_cpu(ex->ee_block);
3201 2277710 : ee_len = ext4_ext_get_actual_len(ex);
3202 2277710 : newblock = split - ee_block + ext4_ext_pblock(ex);
3203 :
3204 2277710 : BUG_ON(split < ee_block || split >= (ee_block + ee_len));
3205 2277710 : BUG_ON(!ext4_ext_is_unwritten(ex) &&
3206 : split_flag & (EXT4_EXT_MAY_ZEROOUT |
3207 : EXT4_EXT_MARK_UNWRIT1 |
3208 : EXT4_EXT_MARK_UNWRIT2));
3209 :
3210 2277710 : err = ext4_ext_get_access(handle, inode, path + depth);
3211 2277709 : if (err)
3212 0 : goto out;
3213 :
3214 2277709 : if (split == ee_block) {
3215 : /*
3216 : * case b: block @split is the block that the extent begins with
3217 : * then we just change the state of the extent, and splitting
3218 : * is not needed.
3219 : */
3220 250024 : if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3221 226019 : ext4_ext_mark_unwritten(ex);
3222 : else
3223 24005 : ext4_ext_mark_initialized(ex);
3224 :
3225 250024 : if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
3226 2277 : ext4_ext_try_to_merge(handle, inode, path, ex);
3227 :
3228 250024 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3229 250027 : goto out;
3230 : }
3231 :
3232 : /* case a */
3233 2027685 : memcpy(&orig_ex, ex, sizeof(orig_ex));
3234 2027685 : ex->ee_len = cpu_to_le16(split - ee_block);
3235 2027685 : if (split_flag & EXT4_EXT_MARK_UNWRIT1)
3236 1772430 : ext4_ext_mark_unwritten(ex);
3237 :
3238 : /*
3239 : * path may lead to new leaf, not to original leaf any more
3240 : * after ext4_ext_insert_extent() returns,
3241 : */
3242 2027685 : err = ext4_ext_dirty(handle, inode, path + depth);
3243 2027685 : if (err)
3244 0 : goto fix_extent_len;
3245 :
3246 2027685 : ex2 = &newex;
3247 2027685 : ex2->ee_block = cpu_to_le32(split);
3248 2027685 : ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
3249 2027685 : ext4_ext_store_pblock(ex2, newblock);
3250 2027685 : if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3251 1770036 : ext4_ext_mark_unwritten(ex2);
3252 :
3253 2027685 : err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
3254 2027685 : if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM)
3255 2027684 : goto out;
3256 :
3257 1 : if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
3258 1 : if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
3259 1 : if (split_flag & EXT4_EXT_DATA_VALID1) {
3260 0 : err = ext4_ext_zeroout(inode, ex2);
3261 0 : zero_ex.ee_block = ex2->ee_block;
3262 0 : zero_ex.ee_len = cpu_to_le16(
3263 : ext4_ext_get_actual_len(ex2));
3264 0 : ext4_ext_store_pblock(&zero_ex,
3265 : ext4_ext_pblock(ex2));
3266 : } else {
3267 1 : err = ext4_ext_zeroout(inode, ex);
3268 1 : zero_ex.ee_block = ex->ee_block;
3269 1 : zero_ex.ee_len = cpu_to_le16(
3270 : ext4_ext_get_actual_len(ex));
3271 1 : ext4_ext_store_pblock(&zero_ex,
3272 : ext4_ext_pblock(ex));
3273 : }
3274 : } else {
3275 0 : err = ext4_ext_zeroout(inode, &orig_ex);
3276 0 : zero_ex.ee_block = orig_ex.ee_block;
3277 0 : zero_ex.ee_len = cpu_to_le16(
3278 : ext4_ext_get_actual_len(&orig_ex));
3279 0 : ext4_ext_store_pblock(&zero_ex,
3280 : ext4_ext_pblock(&orig_ex));
3281 : }
3282 :
3283 1 : if (!err) {
3284 : /* update the extent length and mark as initialized */
3285 1 : ex->ee_len = cpu_to_le16(ee_len);
3286 1 : ext4_ext_try_to_merge(handle, inode, path, ex);
3287 1 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3288 1 : if (!err)
3289 : /* update extent status tree */
3290 1 : ext4_zeroout_es(inode, &zero_ex);
3291 : /* If we failed at this point, we don't know in which
3292 : * state the extent tree exactly is so don't try to fix
3293 : * length of the original extent as it may do even more
3294 : * damage.
3295 : */
3296 1 : goto out;
3297 : }
3298 : }
3299 :
3300 0 : fix_extent_len:
3301 0 : ex->ee_len = orig_ex.ee_len;
3302 : /*
3303 : * Ignore ext4_ext_dirty return value since we are already in error path
3304 : * and err is a non-zero error code.
3305 : */
3306 0 : ext4_ext_dirty(handle, inode, path + path->p_depth);
3307 0 : return err;
3308 : out:
3309 : ext4_ext_show_leaf(inode, path);
3310 : return err;
3311 : }
3312 :
3313 : /*
3314 : * ext4_split_extents() splits an extent and mark extent which is covered
3315 : * by @map as split_flags indicates
3316 : *
3317 : * It may result in splitting the extent into multiple extents (up to three)
3318 : * There are three possibilities:
3319 : * a> There is no split required
3320 : * b> Splits in two extents: Split is happening at either end of the extent
3321 : * c> Splits in three extents: Somone is splitting in middle of the extent
3322 : *
3323 : */
3324 460462 : static int ext4_split_extent(handle_t *handle,
3325 : struct inode *inode,
3326 : struct ext4_ext_path **ppath,
3327 : struct ext4_map_blocks *map,
3328 : int split_flag,
3329 : int flags)
3330 : {
3331 460462 : struct ext4_ext_path *path = *ppath;
3332 460462 : ext4_lblk_t ee_block;
3333 460462 : struct ext4_extent *ex;
3334 460462 : unsigned int ee_len, depth;
3335 460462 : int err = 0;
3336 460462 : int unwritten;
3337 460462 : int split_flag1, flags1;
3338 460462 : int allocated = map->m_len;
3339 :
3340 460462 : depth = ext_depth(inode);
3341 460462 : ex = path[depth].p_ext;
3342 460462 : ee_block = le32_to_cpu(ex->ee_block);
3343 460462 : ee_len = ext4_ext_get_actual_len(ex);
3344 460462 : unwritten = ext4_ext_is_unwritten(ex);
3345 :
3346 460462 : if (map->m_lblk + map->m_len < ee_block + ee_len) {
3347 228876 : split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
3348 228876 : flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
3349 228876 : if (unwritten)
3350 200138 : split_flag1 |= EXT4_EXT_MARK_UNWRIT1 |
3351 : EXT4_EXT_MARK_UNWRIT2;
3352 228876 : if (split_flag & EXT4_EXT_DATA_VALID2)
3353 200138 : split_flag1 |= EXT4_EXT_DATA_VALID1;
3354 228876 : err = ext4_split_extent_at(handle, inode, ppath,
3355 : map->m_lblk + map->m_len, split_flag1, flags1);
3356 228876 : if (err)
3357 0 : goto out;
3358 : } else {
3359 231586 : allocated = ee_len - (map->m_lblk - ee_block);
3360 : }
3361 : /*
3362 : * Update path is required because previous ext4_split_extent_at() may
3363 : * result in split of original leaf or extent zeroout.
3364 : */
3365 460462 : path = ext4_find_extent(inode, map->m_lblk, ppath, flags);
3366 460463 : if (IS_ERR(path))
3367 0 : return PTR_ERR(path);
3368 460463 : depth = ext_depth(inode);
3369 460463 : ex = path[depth].p_ext;
3370 460463 : if (!ex) {
3371 0 : EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3372 : (unsigned long) map->m_lblk);
3373 0 : return -EFSCORRUPTED;
3374 : }
3375 460463 : unwritten = ext4_ext_is_unwritten(ex);
3376 :
3377 460463 : if (map->m_lblk >= ee_block) {
3378 460463 : split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
3379 460463 : if (unwritten) {
3380 406948 : split_flag1 |= EXT4_EXT_MARK_UNWRIT1;
3381 406948 : split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
3382 : EXT4_EXT_MARK_UNWRIT2);
3383 : }
3384 460463 : err = ext4_split_extent_at(handle, inode, ppath,
3385 : map->m_lblk, split_flag1, flags);
3386 460464 : if (err)
3387 0 : goto out;
3388 : }
3389 :
3390 460464 : ext4_ext_show_leaf(inode, path);
3391 460464 : out:
3392 460464 : return err ? err : allocated;
3393 : }
3394 :
3395 : /*
3396 : * This function is called by ext4_ext_map_blocks() if someone tries to write
3397 : * to an unwritten extent. It may result in splitting the unwritten
3398 : * extent into multiple extents (up to three - one initialized and two
3399 : * unwritten).
3400 : * There are three possibilities:
3401 : * a> There is no split required: Entire extent should be initialized
3402 : * b> Splits in two extents: Write is happening at either end of the extent
3403 : * c> Splits in three extents: Somone is writing in middle of the extent
3404 : *
3405 : * Pre-conditions:
3406 : * - The extent pointed to by 'path' is unwritten.
3407 : * - The extent pointed to by 'path' contains a superset
3408 : * of the logical span [map->m_lblk, map->m_lblk + map->m_len).
3409 : *
3410 : * Post-conditions on success:
3411 : * - the returned value is the number of blocks beyond map->l_lblk
3412 : * that are allocated and initialized.
3413 : * It is guaranteed to be >= map->m_len.
3414 : */
3415 5024 : static int ext4_ext_convert_to_initialized(handle_t *handle,
3416 : struct inode *inode,
3417 : struct ext4_map_blocks *map,
3418 : struct ext4_ext_path **ppath,
3419 : int flags)
3420 : {
3421 5024 : struct ext4_ext_path *path = *ppath;
3422 5024 : struct ext4_sb_info *sbi;
3423 5024 : struct ext4_extent_header *eh;
3424 5024 : struct ext4_map_blocks split_map;
3425 5024 : struct ext4_extent zero_ex1, zero_ex2;
3426 5024 : struct ext4_extent *ex, *abut_ex;
3427 5024 : ext4_lblk_t ee_block, eof_block;
3428 5024 : unsigned int ee_len, depth, map_len = map->m_len;
3429 5024 : int allocated = 0, max_zeroout = 0;
3430 5024 : int err = 0;
3431 5024 : int split_flag = EXT4_EXT_DATA_VALID2;
3432 :
3433 5024 : ext_debug(inode, "logical block %llu, max_blocks %u\n",
3434 : (unsigned long long)map->m_lblk, map_len);
3435 :
3436 5024 : sbi = EXT4_SB(inode->i_sb);
3437 10048 : eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
3438 5024 : >> inode->i_sb->s_blocksize_bits;
3439 5024 : if (eof_block < map->m_lblk + map_len)
3440 : eof_block = map->m_lblk + map_len;
3441 :
3442 5024 : depth = ext_depth(inode);
3443 5024 : eh = path[depth].p_hdr;
3444 5024 : ex = path[depth].p_ext;
3445 5024 : ee_block = le32_to_cpu(ex->ee_block);
3446 5024 : ee_len = ext4_ext_get_actual_len(ex);
3447 5024 : zero_ex1.ee_len = 0;
3448 5024 : zero_ex2.ee_len = 0;
3449 :
3450 5024 : trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
3451 :
3452 : /* Pre-conditions */
3453 5024 : BUG_ON(!ext4_ext_is_unwritten(ex));
3454 5024 : BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
3455 :
3456 : /*
3457 : * Attempt to transfer newly initialized blocks from the currently
3458 : * unwritten extent to its neighbor. This is much cheaper
3459 : * than an insertion followed by a merge as those involve costly
3460 : * memmove() calls. Transferring to the left is the common case in
3461 : * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
3462 : * followed by append writes.
3463 : *
3464 : * Limitations of the current logic:
3465 : * - L1: we do not deal with writes covering the whole extent.
3466 : * This would require removing the extent if the transfer
3467 : * is possible.
3468 : * - L2: we only attempt to merge with an extent stored in the
3469 : * same extent tree node.
3470 : */
3471 5024 : if ((map->m_lblk == ee_block) &&
3472 : /* See if we can merge left */
3473 1649 : (map_len < ee_len) && /*L1*/
3474 1649 : (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/
3475 1647 : ext4_lblk_t prev_lblk;
3476 1647 : ext4_fsblk_t prev_pblk, ee_pblk;
3477 1647 : unsigned int prev_len;
3478 :
3479 1647 : abut_ex = ex - 1;
3480 1647 : prev_lblk = le32_to_cpu(abut_ex->ee_block);
3481 1647 : prev_len = ext4_ext_get_actual_len(abut_ex);
3482 1647 : prev_pblk = ext4_ext_pblock(abut_ex);
3483 1647 : ee_pblk = ext4_ext_pblock(ex);
3484 :
3485 : /*
3486 : * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3487 : * upon those conditions:
3488 : * - C1: abut_ex is initialized,
3489 : * - C2: abut_ex is logically abutting ex,
3490 : * - C3: abut_ex is physically abutting ex,
3491 : * - C4: abut_ex can receive the additional blocks without
3492 : * overflowing the (initialized) length limit.
3493 : */
3494 1647 : if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
3495 1418 : ((prev_lblk + prev_len) == ee_block) && /*C2*/
3496 1377 : ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
3497 353 : (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3498 353 : err = ext4_ext_get_access(handle, inode, path + depth);
3499 353 : if (err)
3500 0 : goto out;
3501 :
3502 353 : trace_ext4_ext_convert_to_initialized_fastpath(inode,
3503 : map, ex, abut_ex);
3504 :
3505 : /* Shift the start of ex by 'map_len' blocks */
3506 353 : ex->ee_block = cpu_to_le32(ee_block + map_len);
3507 353 : ext4_ext_store_pblock(ex, ee_pblk + map_len);
3508 353 : ex->ee_len = cpu_to_le16(ee_len - map_len);
3509 353 : ext4_ext_mark_unwritten(ex); /* Restore the flag */
3510 :
3511 : /* Extend abut_ex by 'map_len' blocks */
3512 353 : abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
3513 :
3514 : /* Result: number of initialized blocks past m_lblk */
3515 353 : allocated = map_len;
3516 : }
3517 3377 : } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
3518 976 : (map_len < ee_len) && /*L1*/
3519 976 : ex < EXT_LAST_EXTENT(eh)) { /*L2*/
3520 : /* See if we can merge right */
3521 203 : ext4_lblk_t next_lblk;
3522 203 : ext4_fsblk_t next_pblk, ee_pblk;
3523 203 : unsigned int next_len;
3524 :
3525 203 : abut_ex = ex + 1;
3526 203 : next_lblk = le32_to_cpu(abut_ex->ee_block);
3527 203 : next_len = ext4_ext_get_actual_len(abut_ex);
3528 203 : next_pblk = ext4_ext_pblock(abut_ex);
3529 203 : ee_pblk = ext4_ext_pblock(ex);
3530 :
3531 : /*
3532 : * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3533 : * upon those conditions:
3534 : * - C1: abut_ex is initialized,
3535 : * - C2: abut_ex is logically abutting ex,
3536 : * - C3: abut_ex is physically abutting ex,
3537 : * - C4: abut_ex can receive the additional blocks without
3538 : * overflowing the (initialized) length limit.
3539 : */
3540 203 : if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
3541 0 : ((map->m_lblk + map_len) == next_lblk) && /*C2*/
3542 0 : ((ee_pblk + ee_len) == next_pblk) && /*C3*/
3543 0 : (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3544 0 : err = ext4_ext_get_access(handle, inode, path + depth);
3545 0 : if (err)
3546 0 : goto out;
3547 :
3548 0 : trace_ext4_ext_convert_to_initialized_fastpath(inode,
3549 : map, ex, abut_ex);
3550 :
3551 : /* Shift the start of abut_ex by 'map_len' blocks */
3552 0 : abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
3553 0 : ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
3554 0 : ex->ee_len = cpu_to_le16(ee_len - map_len);
3555 0 : ext4_ext_mark_unwritten(ex); /* Restore the flag */
3556 :
3557 : /* Extend abut_ex by 'map_len' blocks */
3558 0 : abut_ex->ee_len = cpu_to_le16(next_len + map_len);
3559 :
3560 : /* Result: number of initialized blocks past m_lblk */
3561 0 : allocated = map_len;
3562 : }
3563 : }
3564 353 : if (allocated) {
3565 : /* Mark the block containing both extents as dirty */
3566 353 : err = ext4_ext_dirty(handle, inode, path + depth);
3567 :
3568 : /* Update path to point to the right extent */
3569 353 : path[depth].p_ext = abut_ex;
3570 353 : goto out;
3571 : } else
3572 4671 : allocated = ee_len - (map->m_lblk - ee_block);
3573 :
3574 4671 : WARN_ON(map->m_lblk < ee_block);
3575 : /*
3576 : * It is safe to convert extent to initialized via explicit
3577 : * zeroout only if extent is fully inside i_size or new_size.
3578 : */
3579 4671 : split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3580 :
3581 4671 : if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3582 1957 : max_zeroout = sbi->s_extent_max_zeroout_kb >>
3583 1957 : (inode->i_sb->s_blocksize_bits - 10);
3584 :
3585 : /*
3586 : * five cases:
3587 : * 1. split the extent into three extents.
3588 : * 2. split the extent into two extents, zeroout the head of the first
3589 : * extent.
3590 : * 3. split the extent into two extents, zeroout the tail of the second
3591 : * extent.
3592 : * 4. split the extent into two extents with out zeroout.
3593 : * 5. no splitting needed, just possibly zeroout the head and / or the
3594 : * tail of the extent.
3595 : */
3596 4671 : split_map.m_lblk = map->m_lblk;
3597 4671 : split_map.m_len = map->m_len;
3598 :
3599 4671 : if (max_zeroout && (allocated > split_map.m_len)) {
3600 0 : if (allocated <= max_zeroout) {
3601 : /* case 3 or 5 */
3602 0 : zero_ex1.ee_block =
3603 0 : cpu_to_le32(split_map.m_lblk +
3604 : split_map.m_len);
3605 0 : zero_ex1.ee_len =
3606 0 : cpu_to_le16(allocated - split_map.m_len);
3607 0 : ext4_ext_store_pblock(&zero_ex1,
3608 0 : ext4_ext_pblock(ex) + split_map.m_lblk +
3609 0 : split_map.m_len - ee_block);
3610 0 : err = ext4_ext_zeroout(inode, &zero_ex1);
3611 0 : if (err)
3612 0 : goto fallback;
3613 0 : split_map.m_len = allocated;
3614 : }
3615 0 : if (split_map.m_lblk - ee_block + split_map.m_len <
3616 : max_zeroout) {
3617 : /* case 2 or 5 */
3618 0 : if (split_map.m_lblk != ee_block) {
3619 0 : zero_ex2.ee_block = ex->ee_block;
3620 0 : zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
3621 : ee_block);
3622 0 : ext4_ext_store_pblock(&zero_ex2,
3623 : ext4_ext_pblock(ex));
3624 0 : err = ext4_ext_zeroout(inode, &zero_ex2);
3625 0 : if (err)
3626 0 : goto fallback;
3627 : }
3628 :
3629 0 : split_map.m_len += split_map.m_lblk - ee_block;
3630 0 : split_map.m_lblk = ee_block;
3631 0 : allocated = map->m_len;
3632 : }
3633 : }
3634 :
3635 4671 : fallback:
3636 4671 : err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
3637 : flags);
3638 4671 : if (err > 0)
3639 : err = 0;
3640 0 : out:
3641 : /* If we have gotten a failure, don't zero out status tree */
3642 353 : if (!err) {
3643 5024 : ext4_zeroout_es(inode, &zero_ex1);
3644 5024 : ext4_zeroout_es(inode, &zero_ex2);
3645 : }
3646 5024 : return err ? err : allocated;
3647 : }
3648 :
3649 : /*
3650 : * This function is called by ext4_ext_map_blocks() from
3651 : * ext4_get_blocks_dio_write() when DIO to write
3652 : * to an unwritten extent.
3653 : *
3654 : * Writing to an unwritten extent may result in splitting the unwritten
3655 : * extent into multiple initialized/unwritten extents (up to three)
3656 : * There are three possibilities:
3657 : * a> There is no split required: Entire extent should be unwritten
3658 : * b> Splits in two extents: Write is happening at either end of the extent
3659 : * c> Splits in three extents: Somone is writing in middle of the extent
3660 : *
3661 : * This works the same way in the case of initialized -> unwritten conversion.
3662 : *
3663 : * One of more index blocks maybe needed if the extent tree grow after
3664 : * the unwritten extent split. To prevent ENOSPC occur at the IO
3665 : * complete, we need to split the unwritten extent before DIO submit
3666 : * the IO. The unwritten extent called at this time will be split
3667 : * into three unwritten extent(at most). After IO complete, the part
3668 : * being filled will be convert to initialized by the end_io callback function
3669 : * via ext4_convert_unwritten_extents().
3670 : *
3671 : * Returns the size of unwritten extent to be written on success.
3672 : */
3673 455789 : static int ext4_split_convert_extents(handle_t *handle,
3674 : struct inode *inode,
3675 : struct ext4_map_blocks *map,
3676 : struct ext4_ext_path **ppath,
3677 : int flags)
3678 : {
3679 455789 : struct ext4_ext_path *path = *ppath;
3680 455789 : ext4_lblk_t eof_block;
3681 455789 : ext4_lblk_t ee_block;
3682 455789 : struct ext4_extent *ex;
3683 455789 : unsigned int ee_len;
3684 455789 : int split_flag = 0, depth;
3685 :
3686 455789 : ext_debug(inode, "logical block %llu, max_blocks %u\n",
3687 : (unsigned long long)map->m_lblk, map->m_len);
3688 :
3689 911578 : eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
3690 455789 : >> inode->i_sb->s_blocksize_bits;
3691 455789 : if (eof_block < map->m_lblk + map->m_len)
3692 : eof_block = map->m_lblk + map->m_len;
3693 : /*
3694 : * It is safe to convert extent to initialized via explicit
3695 : * zeroout only if extent is fully inside i_size or new_size.
3696 : */
3697 455789 : depth = ext_depth(inode);
3698 455789 : ex = path[depth].p_ext;
3699 455789 : ee_block = le32_to_cpu(ex->ee_block);
3700 455789 : ee_len = ext4_ext_get_actual_len(ex);
3701 :
3702 : /* Convert to unwritten */
3703 455789 : if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
3704 : split_flag |= EXT4_EXT_DATA_VALID1;
3705 : /* Convert to initialized */
3706 402273 : } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
3707 402273 : split_flag |= ee_block + ee_len <= eof_block ?
3708 402273 : EXT4_EXT_MAY_ZEROOUT : 0;
3709 402273 : split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
3710 : }
3711 455789 : flags |= EXT4_GET_BLOCKS_PRE_IO;
3712 455789 : return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
3713 : }
3714 :
3715 1598001 : static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3716 : struct inode *inode,
3717 : struct ext4_map_blocks *map,
3718 : struct ext4_ext_path **ppath)
3719 : {
3720 1598001 : struct ext4_ext_path *path = *ppath;
3721 1598001 : struct ext4_extent *ex;
3722 1598001 : ext4_lblk_t ee_block;
3723 1598001 : unsigned int ee_len;
3724 1598001 : int depth;
3725 1598001 : int err = 0;
3726 :
3727 1598001 : depth = ext_depth(inode);
3728 1598001 : ex = path[depth].p_ext;
3729 1598001 : ee_block = le32_to_cpu(ex->ee_block);
3730 1598001 : ee_len = ext4_ext_get_actual_len(ex);
3731 :
3732 1598001 : ext_debug(inode, "logical block %llu, max_blocks %u\n",
3733 : (unsigned long long)ee_block, ee_len);
3734 :
3735 : /* If extent is larger than requested it is a clear sign that we still
3736 : * have some extent state machine issues left. So extent_split is still
3737 : * required.
3738 : * TODO: Once all related issues will be fixed this situation should be
3739 : * illegal.
3740 : */
3741 1598001 : if (ee_block != map->m_lblk || ee_len > map->m_len) {
3742 : #ifdef CONFIG_EXT4_DEBUG
3743 31 : ext4_warning(inode->i_sb, "Inode (%ld) finished: extent logical block %llu,"
3744 : " len %u; IO logical block %llu, len %u",
3745 : inode->i_ino, (unsigned long long)ee_block, ee_len,
3746 : (unsigned long long)map->m_lblk, map->m_len);
3747 : #endif
3748 28 : err = ext4_split_convert_extents(handle, inode, map, ppath,
3749 : EXT4_GET_BLOCKS_CONVERT);
3750 28 : if (err < 0)
3751 : return err;
3752 28 : path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3753 28 : if (IS_ERR(path))
3754 0 : return PTR_ERR(path);
3755 28 : depth = ext_depth(inode);
3756 28 : ex = path[depth].p_ext;
3757 : }
3758 :
3759 1597999 : err = ext4_ext_get_access(handle, inode, path + depth);
3760 1598027 : if (err)
3761 0 : goto out;
3762 : /* first mark the extent as initialized */
3763 1598027 : ext4_ext_mark_initialized(ex);
3764 :
3765 : /* note: ext4_ext_correct_indexes() isn't needed here because
3766 : * borders are not changed
3767 : */
3768 1598027 : ext4_ext_try_to_merge(handle, inode, path, ex);
3769 :
3770 : /* Mark modified extent as dirty */
3771 1598017 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3772 : out:
3773 : ext4_ext_show_leaf(inode, path);
3774 : return err;
3775 : }
3776 :
3777 : static int
3778 121357 : convert_initialized_extent(handle_t *handle, struct inode *inode,
3779 : struct ext4_map_blocks *map,
3780 : struct ext4_ext_path **ppath,
3781 : unsigned int *allocated)
3782 : {
3783 121357 : struct ext4_ext_path *path = *ppath;
3784 121357 : struct ext4_extent *ex;
3785 121357 : ext4_lblk_t ee_block;
3786 121357 : unsigned int ee_len;
3787 121357 : int depth;
3788 121357 : int err = 0;
3789 :
3790 : /*
3791 : * Make sure that the extent is no bigger than we support with
3792 : * unwritten extent
3793 : */
3794 121357 : if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
3795 0 : map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
3796 :
3797 121357 : depth = ext_depth(inode);
3798 121357 : ex = path[depth].p_ext;
3799 121357 : ee_block = le32_to_cpu(ex->ee_block);
3800 121357 : ee_len = ext4_ext_get_actual_len(ex);
3801 :
3802 121357 : ext_debug(inode, "logical block %llu, max_blocks %u\n",
3803 : (unsigned long long)ee_block, ee_len);
3804 :
3805 121357 : if (ee_block != map->m_lblk || ee_len > map->m_len) {
3806 53516 : err = ext4_split_convert_extents(handle, inode, map, ppath,
3807 : EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3808 53516 : if (err < 0)
3809 : return err;
3810 53516 : path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3811 53516 : if (IS_ERR(path))
3812 0 : return PTR_ERR(path);
3813 53516 : depth = ext_depth(inode);
3814 53516 : ex = path[depth].p_ext;
3815 53516 : if (!ex) {
3816 0 : EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3817 : (unsigned long) map->m_lblk);
3818 0 : return -EFSCORRUPTED;
3819 : }
3820 : }
3821 :
3822 121357 : err = ext4_ext_get_access(handle, inode, path + depth);
3823 121357 : if (err)
3824 : return err;
3825 : /* first mark the extent as unwritten */
3826 121357 : ext4_ext_mark_unwritten(ex);
3827 :
3828 : /* note: ext4_ext_correct_indexes() isn't needed here because
3829 : * borders are not changed
3830 : */
3831 121357 : ext4_ext_try_to_merge(handle, inode, path, ex);
3832 :
3833 : /* Mark modified extent as dirty */
3834 121357 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3835 121357 : if (err)
3836 : return err;
3837 121357 : ext4_ext_show_leaf(inode, path);
3838 :
3839 121357 : ext4_update_inode_fsync_trans(handle, inode, 1);
3840 :
3841 121357 : map->m_flags |= EXT4_MAP_UNWRITTEN;
3842 121357 : if (*allocated > map->m_len)
3843 28738 : *allocated = map->m_len;
3844 121357 : map->m_len = *allocated;
3845 121357 : return 0;
3846 : }
3847 :
3848 : static int
3849 2949144 : ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
3850 : struct ext4_map_blocks *map,
3851 : struct ext4_ext_path **ppath, int flags,
3852 : unsigned int allocated, ext4_fsblk_t newblock)
3853 : {
3854 2949144 : struct ext4_ext_path __maybe_unused *path = *ppath;
3855 2949144 : int ret = 0;
3856 2949144 : int err = 0;
3857 :
3858 2949144 : ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n",
3859 : (unsigned long long)map->m_lblk, map->m_len, flags,
3860 : allocated);
3861 2949144 : ext4_ext_show_leaf(inode, path);
3862 :
3863 : /*
3864 : * When writing into unwritten space, we should not fail to
3865 : * allocate metadata blocks for the new extent block if needed.
3866 : */
3867 2949144 : flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
3868 :
3869 2949144 : trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
3870 : allocated, newblock);
3871 :
3872 : /* get_block() before submitting IO, split the extent */
3873 2949161 : if (flags & EXT4_GET_BLOCKS_PRE_IO) {
3874 402247 : ret = ext4_split_convert_extents(handle, inode, map, ppath,
3875 : flags | EXT4_GET_BLOCKS_CONVERT);
3876 402248 : if (ret < 0) {
3877 0 : err = ret;
3878 0 : goto out2;
3879 : }
3880 : /*
3881 : * shouldn't get a 0 return when splitting an extent unless
3882 : * m_len is 0 (bug) or extent has been corrupted
3883 : */
3884 402248 : if (unlikely(ret == 0)) {
3885 0 : EXT4_ERROR_INODE(inode,
3886 : "unexpected ret == 0, m_len = %u",
3887 : map->m_len);
3888 0 : err = -EFSCORRUPTED;
3889 0 : goto out2;
3890 : }
3891 402248 : map->m_flags |= EXT4_MAP_UNWRITTEN;
3892 402248 : goto out;
3893 : }
3894 : /* IO end_io complete, convert the filled extent to written */
3895 2546914 : if (flags & EXT4_GET_BLOCKS_CONVERT) {
3896 1598008 : err = ext4_convert_unwritten_extents_endio(handle, inode, map,
3897 : ppath);
3898 1598008 : if (err < 0)
3899 0 : goto out2;
3900 1598008 : ext4_update_inode_fsync_trans(handle, inode, 1);
3901 1597991 : goto map_out;
3902 : }
3903 : /* buffered IO cases */
3904 : /*
3905 : * repeat fallocate creation request
3906 : * we already have an unwritten extent
3907 : */
3908 948906 : if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
3909 556314 : map->m_flags |= EXT4_MAP_UNWRITTEN;
3910 556314 : goto map_out;
3911 : }
3912 :
3913 : /* buffered READ or buffered write_begin() lookup */
3914 392592 : if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3915 : /*
3916 : * We have blocks reserved already. We
3917 : * return allocated blocks so that delalloc
3918 : * won't do block reservation for us. But
3919 : * the buffer head will be unmapped so that
3920 : * a read from the block returns 0s.
3921 : */
3922 387568 : map->m_flags |= EXT4_MAP_UNWRITTEN;
3923 387568 : goto out1;
3924 : }
3925 :
3926 : /*
3927 : * Default case when (flags & EXT4_GET_BLOCKS_CREATE) == 1.
3928 : * For buffered writes, at writepage time, etc. Convert a
3929 : * discovered unwritten extent to written.
3930 : */
3931 5024 : ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
3932 5024 : if (ret < 0) {
3933 0 : err = ret;
3934 0 : goto out2;
3935 : }
3936 5024 : ext4_update_inode_fsync_trans(handle, inode, 1);
3937 : /*
3938 : * shouldn't get a 0 return when converting an unwritten extent
3939 : * unless m_len is 0 (bug) or extent has been corrupted
3940 : */
3941 5024 : if (unlikely(ret == 0)) {
3942 0 : EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u",
3943 : map->m_len);
3944 0 : err = -EFSCORRUPTED;
3945 0 : goto out2;
3946 : }
3947 :
3948 5024 : out:
3949 407272 : allocated = ret;
3950 407272 : map->m_flags |= EXT4_MAP_NEW;
3951 2561577 : map_out:
3952 2561577 : map->m_flags |= EXT4_MAP_MAPPED;
3953 2949145 : out1:
3954 2949145 : map->m_pblk = newblock;
3955 2949145 : if (allocated > map->m_len)
3956 : allocated = map->m_len;
3957 2949145 : map->m_len = allocated;
3958 2949145 : ext4_ext_show_leaf(inode, path);
3959 2949145 : out2:
3960 2949145 : return err ? err : allocated;
3961 : }
3962 :
3963 : /*
3964 : * get_implied_cluster_alloc - check to see if the requested
3965 : * allocation (in the map structure) overlaps with a cluster already
3966 : * allocated in an extent.
3967 : * @sb The filesystem superblock structure
3968 : * @map The requested lblk->pblk mapping
3969 : * @ex The extent structure which might contain an implied
3970 : * cluster allocation
3971 : *
3972 : * This function is called by ext4_ext_map_blocks() after we failed to
3973 : * find blocks that were already in the inode's extent tree. Hence,
3974 : * we know that the beginning of the requested region cannot overlap
3975 : * the extent from the inode's extent tree. There are three cases we
3976 : * want to catch. The first is this case:
3977 : *
3978 : * |--- cluster # N--|
3979 : * |--- extent ---| |---- requested region ---|
3980 : * |==========|
3981 : *
3982 : * The second case that we need to test for is this one:
3983 : *
3984 : * |--------- cluster # N ----------------|
3985 : * |--- requested region --| |------- extent ----|
3986 : * |=======================|
3987 : *
3988 : * The third case is when the requested region lies between two extents
3989 : * within the same cluster:
3990 : * |------------- cluster # N-------------|
3991 : * |----- ex -----| |---- ex_right ----|
3992 : * |------ requested region ------|
3993 : * |================|
3994 : *
3995 : * In each of the above cases, we need to set the map->m_pblk and
3996 : * map->m_len so it corresponds to the return the extent labelled as
3997 : * "|====|" from cluster #N, since it is already in use for data in
3998 : * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
3999 : * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
4000 : * as a new "allocated" block region. Otherwise, we will return 0 and
4001 : * ext4_ext_map_blocks() will then allocate one or more new clusters
4002 : * by calling ext4_mb_new_blocks().
4003 : */
4004 1 : static int get_implied_cluster_alloc(struct super_block *sb,
4005 : struct ext4_map_blocks *map,
4006 : struct ext4_extent *ex,
4007 : struct ext4_ext_path *path)
4008 : {
4009 1 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4010 1 : ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4011 1 : ext4_lblk_t ex_cluster_start, ex_cluster_end;
4012 1 : ext4_lblk_t rr_cluster_start;
4013 1 : ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
4014 1 : ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4015 1 : unsigned short ee_len = ext4_ext_get_actual_len(ex);
4016 :
4017 : /* The extent passed in that we are trying to match */
4018 1 : ex_cluster_start = EXT4_B2C(sbi, ee_block);
4019 1 : ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
4020 :
4021 : /* The requested region passed into ext4_map_blocks() */
4022 1 : rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
4023 :
4024 1 : if ((rr_cluster_start == ex_cluster_end) ||
4025 1 : (rr_cluster_start == ex_cluster_start)) {
4026 1 : if (rr_cluster_start == ex_cluster_end)
4027 1 : ee_start += ee_len - 1;
4028 1 : map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
4029 1 : map->m_len = min(map->m_len,
4030 : (unsigned) sbi->s_cluster_ratio - c_offset);
4031 : /*
4032 : * Check for and handle this case:
4033 : *
4034 : * |--------- cluster # N-------------|
4035 : * |------- extent ----|
4036 : * |--- requested region ---|
4037 : * |===========|
4038 : */
4039 :
4040 1 : if (map->m_lblk < ee_block)
4041 0 : map->m_len = min(map->m_len, ee_block - map->m_lblk);
4042 :
4043 : /*
4044 : * Check for the case where there is already another allocated
4045 : * block to the right of 'ex' but before the end of the cluster.
4046 : *
4047 : * |------------- cluster # N-------------|
4048 : * |----- ex -----| |---- ex_right ----|
4049 : * |------ requested region ------|
4050 : * |================|
4051 : */
4052 1 : if (map->m_lblk > ee_block) {
4053 1 : ext4_lblk_t next = ext4_ext_next_allocated_block(path);
4054 1 : map->m_len = min(map->m_len, next - map->m_lblk);
4055 : }
4056 :
4057 1 : trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
4058 1 : return 1;
4059 : }
4060 :
4061 0 : trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
4062 0 : return 0;
4063 : }
4064 :
4065 :
4066 : /*
4067 : * Block allocation/map/preallocation routine for extents based files
4068 : *
4069 : *
4070 : * Need to be called with
4071 : * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
4072 : * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
4073 : *
4074 : * return > 0, number of blocks already mapped/allocated
4075 : * if create == 0 and these are pre-allocated blocks
4076 : * buffer head is unmapped
4077 : * otherwise blocks are mapped
4078 : *
4079 : * return = 0, if plain look up failed (blocks have not been allocated)
4080 : * buffer head is unmapped
4081 : *
4082 : * return < 0, error case.
4083 : */
4084 11400187 : int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4085 : struct ext4_map_blocks *map, int flags)
4086 : {
4087 11400187 : struct ext4_ext_path *path = NULL;
4088 11400187 : struct ext4_extent newex, *ex, ex2;
4089 11400187 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4090 11400187 : ext4_fsblk_t newblock = 0, pblk;
4091 11400187 : int err = 0, depth, ret;
4092 11400187 : unsigned int allocated = 0, offset = 0;
4093 11400187 : unsigned int allocated_clusters = 0;
4094 11400187 : struct ext4_allocation_request ar;
4095 11400187 : ext4_lblk_t cluster_offset;
4096 :
4097 11400187 : ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len);
4098 11400187 : trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
4099 :
4100 : /* find extent for this block */
4101 11393089 : path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
4102 11397684 : if (IS_ERR(path)) {
4103 1 : err = PTR_ERR(path);
4104 1 : path = NULL;
4105 1 : goto out;
4106 : }
4107 :
4108 11397683 : depth = ext_depth(inode);
4109 :
4110 : /*
4111 : * consistent leaf must not be empty;
4112 : * this situation is possible, though, _during_ tree modification;
4113 : * this is why assert can't be put in ext4_find_extent()
4114 : */
4115 11397683 : if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
4116 0 : EXT4_ERROR_INODE(inode, "bad extent address "
4117 : "lblock: %lu, depth: %d pblock %lld",
4118 : (unsigned long) map->m_lblk, depth,
4119 : path[depth].p_block);
4120 0 : err = -EFSCORRUPTED;
4121 0 : goto out;
4122 : }
4123 :
4124 11397683 : ex = path[depth].p_ext;
4125 11397683 : if (ex) {
4126 8660075 : ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
4127 8660075 : ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4128 8660075 : unsigned short ee_len;
4129 :
4130 :
4131 : /*
4132 : * unwritten extents are treated as holes, except that
4133 : * we split out initialized portions during a write.
4134 : */
4135 8660075 : ee_len = ext4_ext_get_actual_len(ex);
4136 :
4137 8660075 : trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
4138 :
4139 : /* if found extent covers block, simply return it */
4140 8660751 : if (in_range(map->m_lblk, ee_block, ee_len)) {
4141 3904162 : newblock = map->m_lblk - ee_block + ee_start;
4142 : /* number of remaining blocks in the extent */
4143 3904162 : allocated = ee_len - (map->m_lblk - ee_block);
4144 3904162 : ext_debug(inode, "%u fit into %u:%d -> %llu\n",
4145 : map->m_lblk, ee_block, ee_len, newblock);
4146 :
4147 : /*
4148 : * If the extent is initialized check whether the
4149 : * caller wants to convert it to unwritten.
4150 : */
4151 3904162 : if ((!ext4_ext_is_unwritten(ex)) &&
4152 955014 : (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
4153 121357 : err = convert_initialized_extent(handle,
4154 : inode, map, &path, &allocated);
4155 121357 : goto out;
4156 3782805 : } else if (!ext4_ext_is_unwritten(ex)) {
4157 833657 : map->m_flags |= EXT4_MAP_MAPPED;
4158 833657 : map->m_pblk = newblock;
4159 833657 : if (allocated > map->m_len)
4160 193252 : allocated = map->m_len;
4161 833657 : map->m_len = allocated;
4162 833657 : ext4_ext_show_leaf(inode, path);
4163 833657 : goto out;
4164 : }
4165 :
4166 2949148 : ret = ext4_ext_handle_unwritten_extents(
4167 : handle, inode, map, &path, flags,
4168 : allocated, newblock);
4169 2949120 : if (ret < 0)
4170 0 : err = ret;
4171 : else
4172 2949120 : allocated = ret;
4173 2949120 : goto out;
4174 : }
4175 : }
4176 :
4177 : /*
4178 : * requested block isn't allocated yet;
4179 : * we couldn't try to create block if create flag is zero
4180 : */
4181 7494197 : if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
4182 3498725 : ext4_lblk_t hole_start, hole_len;
4183 :
4184 3498725 : hole_start = map->m_lblk;
4185 3498725 : hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
4186 : /*
4187 : * put just found gap into cache to speed up
4188 : * subsequent requests
4189 : */
4190 3493623 : ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
4191 :
4192 : /* Update hole_len to reflect hole size after map->m_lblk */
4193 3496399 : if (hole_start != map->m_lblk)
4194 1517714 : hole_len -= map->m_lblk - hole_start;
4195 3496399 : map->m_pblk = 0;
4196 3496399 : map->m_len = min_t(unsigned int, map->m_len, hole_len);
4197 :
4198 3496399 : goto out;
4199 : }
4200 :
4201 : /*
4202 : * Okay, we need to do block allocation.
4203 : */
4204 3995472 : newex.ee_block = cpu_to_le32(map->m_lblk);
4205 3995472 : cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4206 :
4207 : /*
4208 : * If we are doing bigalloc, check to see if the extent returned
4209 : * by ext4_find_extent() implies a cluster we can use.
4210 : */
4211 3995473 : if (cluster_offset && ex &&
4212 1 : get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
4213 1 : ar.len = allocated = map->m_len;
4214 1 : newblock = map->m_pblk;
4215 1 : goto got_allocated_blocks;
4216 : }
4217 :
4218 : /* find neighbour allocated blocks */
4219 3995471 : ar.lleft = map->m_lblk;
4220 3995471 : err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
4221 3994595 : if (err)
4222 0 : goto out;
4223 3994595 : ar.lright = map->m_lblk;
4224 3994595 : err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
4225 3993841 : if (err < 0)
4226 0 : goto out;
4227 :
4228 : /* Check if the extent after searching to the right implies a
4229 : * cluster we can use. */
4230 3993841 : if ((sbi->s_cluster_ratio > 1) && err &&
4231 0 : get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) {
4232 0 : ar.len = allocated = map->m_len;
4233 0 : newblock = map->m_pblk;
4234 0 : goto got_allocated_blocks;
4235 : }
4236 :
4237 : /*
4238 : * See if request is beyond maximum number of blocks we can have in
4239 : * a single extent. For an initialized extent this limit is
4240 : * EXT_INIT_MAX_LEN and for an unwritten extent this limit is
4241 : * EXT_UNWRITTEN_MAX_LEN.
4242 : */
4243 3993841 : if (map->m_len > EXT_INIT_MAX_LEN &&
4244 450958 : !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4245 0 : map->m_len = EXT_INIT_MAX_LEN;
4246 3993841 : else if (map->m_len > EXT_UNWRITTEN_MAX_LEN &&
4247 450994 : (flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4248 450994 : map->m_len = EXT_UNWRITTEN_MAX_LEN;
4249 :
4250 : /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
4251 3993841 : newex.ee_len = cpu_to_le16(map->m_len);
4252 3993841 : err = ext4_ext_check_overlap(sbi, inode, &newex, path);
4253 3993958 : if (err)
4254 0 : allocated = ext4_ext_get_actual_len(&newex);
4255 : else
4256 3993958 : allocated = map->m_len;
4257 :
4258 : /* allocate new block */
4259 3993958 : ar.inode = inode;
4260 3993958 : ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
4261 3993082 : ar.logical = map->m_lblk;
4262 : /*
4263 : * We calculate the offset from the beginning of the cluster
4264 : * for the logical block number, since when we allocate a
4265 : * physical cluster, the physical block should start at the
4266 : * same offset from the beginning of the cluster. This is
4267 : * needed so that future calls to get_implied_cluster_alloc()
4268 : * work correctly.
4269 : */
4270 3993082 : offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4271 3993082 : ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
4272 3993082 : ar.goal -= offset;
4273 3993082 : ar.logical -= offset;
4274 3993082 : if (S_ISREG(inode->i_mode))
4275 3569677 : ar.flags = EXT4_MB_HINT_DATA;
4276 : else
4277 : /* disable in-core preallocation for non-regular files */
4278 423405 : ar.flags = 0;
4279 3993082 : if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
4280 651114 : ar.flags |= EXT4_MB_HINT_NOPREALLOC;
4281 3993082 : if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4282 872370 : ar.flags |= EXT4_MB_DELALLOC_RESERVED;
4283 3993082 : if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
4284 874281 : ar.flags |= EXT4_MB_USE_RESERVED;
4285 3993082 : newblock = ext4_mb_new_blocks(handle, &ar, &err);
4286 3996464 : if (!newblock)
4287 729050 : goto out;
4288 3267414 : allocated_clusters = ar.len;
4289 3267414 : ar.len = EXT4_C2B(sbi, ar.len) - offset;
4290 3267414 : ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n",
4291 : ar.goal, newblock, ar.len, allocated);
4292 3267414 : if (ar.len > allocated)
4293 1 : ar.len = allocated;
4294 :
4295 3267413 : got_allocated_blocks:
4296 : /* try to insert new extent into found leaf and return */
4297 3267415 : pblk = newblock + offset;
4298 3267415 : ext4_ext_store_pblock(&newex, pblk);
4299 3267415 : newex.ee_len = cpu_to_le16(ar.len);
4300 : /* Mark unwritten */
4301 3267415 : if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
4302 2622458 : ext4_ext_mark_unwritten(&newex);
4303 2622458 : map->m_flags |= EXT4_MAP_UNWRITTEN;
4304 : }
4305 :
4306 3267415 : err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags);
4307 3267259 : if (err) {
4308 275 : if (allocated_clusters) {
4309 275 : int fb_flags = 0;
4310 :
4311 : /*
4312 : * free data blocks we just allocated.
4313 : * not a good idea to call discard here directly,
4314 : * but otherwise we'd need to call it every free().
4315 : */
4316 275 : ext4_discard_preallocations(inode, 0);
4317 275 : if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4318 3 : fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
4319 550 : ext4_free_blocks(handle, inode, NULL, newblock,
4320 275 : EXT4_C2B(sbi, allocated_clusters),
4321 : fb_flags);
4322 : }
4323 275 : goto out;
4324 : }
4325 :
4326 : /*
4327 : * Reduce the reserved cluster count to reflect successful deferred
4328 : * allocation of delayed allocated clusters or direct allocation of
4329 : * clusters discovered to be delayed allocated. Once allocated, a
4330 : * cluster is not included in the reserved count.
4331 : */
4332 3266984 : if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) {
4333 3262016 : if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
4334 : /*
4335 : * When allocating delayed allocated clusters, simply
4336 : * reduce the reserved cluster count and claim quota
4337 : */
4338 872278 : ext4_da_update_reserve_space(inode, allocated_clusters,
4339 : 1);
4340 : } else {
4341 2389738 : ext4_lblk_t lblk, len;
4342 2389738 : unsigned int n;
4343 :
4344 : /*
4345 : * When allocating non-delayed allocated clusters
4346 : * (from fallocate, filemap, DIO, or clusters
4347 : * allocated when delalloc has been disabled by
4348 : * ext4_nonda_switch), reduce the reserved cluster
4349 : * count by the number of allocated clusters that
4350 : * have previously been delayed allocated. Quota
4351 : * has been claimed by ext4_mb_new_blocks() above,
4352 : * so release the quota reservations made for any
4353 : * previously delayed allocated clusters.
4354 : */
4355 2389738 : lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk);
4356 2389738 : len = allocated_clusters << sbi->s_cluster_bits;
4357 2389738 : n = ext4_es_delayed_clu(inode, lblk, len);
4358 2389631 : if (n > 0)
4359 37998 : ext4_da_update_reserve_space(inode, (int) n, 0);
4360 : }
4361 : }
4362 :
4363 : /*
4364 : * Cache the extent and update transaction to commit on fdatasync only
4365 : * when it is _not_ an unwritten extent.
4366 : */
4367 3266902 : if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0)
4368 644500 : ext4_update_inode_fsync_trans(handle, inode, 1);
4369 : else
4370 2622402 : ext4_update_inode_fsync_trans(handle, inode, 0);
4371 :
4372 3266633 : map->m_flags |= (EXT4_MAP_NEW | EXT4_MAP_MAPPED);
4373 3266633 : map->m_pblk = pblk;
4374 3266633 : map->m_len = ar.len;
4375 3266633 : allocated = map->m_len;
4376 11396492 : ext4_ext_show_leaf(inode, path);
4377 11396492 : out:
4378 11396492 : ext4_free_ext_path(path);
4379 :
4380 11397961 : trace_ext4_ext_map_blocks_exit(inode, flags, map,
4381 11397961 : err ? err : allocated);
4382 11392515 : return err ? err : allocated;
4383 : }
4384 :
4385 726542 : int ext4_ext_truncate(handle_t *handle, struct inode *inode)
4386 : {
4387 726542 : struct super_block *sb = inode->i_sb;
4388 726542 : ext4_lblk_t last_block;
4389 726542 : int err = 0;
4390 :
4391 : /*
4392 : * TODO: optimization is possible here.
4393 : * Probably we need not scan at all,
4394 : * because page truncation is enough.
4395 : */
4396 :
4397 : /* we have to know where to truncate from in crash case */
4398 726542 : EXT4_I(inode)->i_disksize = inode->i_size;
4399 726542 : err = ext4_mark_inode_dirty(handle, inode);
4400 726725 : if (err)
4401 : return err;
4402 :
4403 1453450 : last_block = (inode->i_size + sb->s_blocksize - 1)
4404 726725 : >> EXT4_BLOCK_SIZE_BITS(sb);
4405 726725 : ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
4406 :
4407 726500 : retry_remove_space:
4408 726500 : err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4409 726425 : if (err == -ENOMEM) {
4410 0 : memalloc_retry_wait(GFP_ATOMIC);
4411 0 : goto retry_remove_space;
4412 : }
4413 : return err;
4414 : }
4415 :
4416 1038140 : static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4417 : ext4_lblk_t len, loff_t new_size,
4418 : int flags)
4419 : {
4420 1038140 : struct inode *inode = file_inode(file);
4421 1038140 : handle_t *handle;
4422 1038140 : int ret = 0, ret2 = 0, ret3 = 0;
4423 1038140 : int retries = 0;
4424 1038140 : int depth = 0;
4425 1038140 : struct ext4_map_blocks map;
4426 1038140 : unsigned int credits;
4427 1038140 : loff_t epos;
4428 :
4429 1038140 : BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
4430 1038140 : map.m_lblk = offset;
4431 1038140 : map.m_len = len;
4432 : /*
4433 : * Don't normalize the request if it can fit in one extent so
4434 : * that it doesn't get unnecessarily split into multiple
4435 : * extents.
4436 : */
4437 1038140 : if (len <= EXT_UNWRITTEN_MAX_LEN)
4438 1037987 : flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4439 :
4440 : /*
4441 : * credits to insert 1 extent into extent tree
4442 : */
4443 1038140 : credits = ext4_chunk_trans_blocks(inode, len);
4444 1038138 : depth = ext_depth(inode);
4445 :
4446 : retry:
4447 3289789 : while (len) {
4448 : /*
4449 : * Recalculate credits when extent tree depth changes.
4450 : */
4451 2263476 : if (depth != ext_depth(inode)) {
4452 8414 : credits = ext4_chunk_trans_blocks(inode, len);
4453 8414 : depth = ext_depth(inode);
4454 : }
4455 :
4456 2263476 : handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4457 : credits);
4458 2263472 : if (IS_ERR(handle)) {
4459 0 : ret = PTR_ERR(handle);
4460 0 : break;
4461 : }
4462 2263472 : ret = ext4_map_blocks(handle, inode, &map, flags);
4463 2263473 : if (ret <= 0) {
4464 28063 : ext4_debug("inode #%lu: block %u: len %u: "
4465 : "ext4_ext_map_blocks returned %d",
4466 : inode->i_ino, map.m_lblk,
4467 : map.m_len, ret);
4468 28063 : ext4_mark_inode_dirty(handle, inode);
4469 28064 : ext4_journal_stop(handle);
4470 28064 : break;
4471 : }
4472 : /*
4473 : * allow a full retry cycle for any remaining allocations
4474 : */
4475 2235410 : retries = 0;
4476 2235410 : map.m_lblk += ret;
4477 2235410 : map.m_len = len = len - ret;
4478 2235410 : epos = (loff_t)map.m_lblk << inode->i_blkbits;
4479 2235410 : inode->i_ctime = current_time(inode);
4480 2235398 : if (new_size) {
4481 1081204 : if (epos > new_size)
4482 : epos = new_size;
4483 1081204 : if (ext4_update_inode_size(inode, epos) & 0x1)
4484 557563 : inode->i_mtime = inode->i_ctime;
4485 : }
4486 2235399 : ret2 = ext4_mark_inode_dirty(handle, inode);
4487 2235429 : ext4_update_inode_fsync_trans(handle, inode, 1);
4488 2235429 : ret3 = ext4_journal_stop(handle);
4489 2235421 : ret2 = ret3 ? ret3 : ret2;
4490 2235421 : if (unlikely(ret2))
4491 : break;
4492 : }
4493 1054373 : if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
4494 16230 : goto retry;
4495 :
4496 1038142 : return ret > 0 ? ret2 : ret;
4497 : }
4498 :
4499 : static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
4500 :
4501 : static int ext4_insert_range(struct file *file, loff_t offset, loff_t len);
4502 :
4503 203855 : static long ext4_zero_range(struct file *file, loff_t offset,
4504 : loff_t len, int mode)
4505 : {
4506 203855 : struct inode *inode = file_inode(file);
4507 203855 : struct address_space *mapping = file->f_mapping;
4508 203855 : handle_t *handle = NULL;
4509 203855 : unsigned int max_blocks;
4510 203855 : loff_t new_size = 0;
4511 203855 : int ret = 0;
4512 203855 : int flags;
4513 203855 : int credits;
4514 203855 : int partial_begin, partial_end;
4515 203855 : loff_t start, end;
4516 203855 : ext4_lblk_t lblk;
4517 203855 : unsigned int blkbits = inode->i_blkbits;
4518 :
4519 203855 : trace_ext4_zero_range(inode, offset, len, mode);
4520 :
4521 : /*
4522 : * Round up offset. This is not fallocate, we need to zero out
4523 : * blocks, so convert interior block aligned part of the range to
4524 : * unwritten and possibly manually zero out unaligned parts of the
4525 : * range.
4526 : */
4527 203852 : start = round_up(offset, 1 << blkbits);
4528 203852 : end = round_down((offset + len), 1 << blkbits);
4529 :
4530 203852 : if (start < offset || end > offset + len)
4531 : return -EINVAL;
4532 203852 : partial_begin = offset & ((1 << blkbits) - 1);
4533 203852 : partial_end = (offset + len) & ((1 << blkbits) - 1);
4534 :
4535 203852 : lblk = start >> blkbits;
4536 203852 : max_blocks = (end >> blkbits);
4537 203852 : if (max_blocks < lblk)
4538 : max_blocks = 0;
4539 : else
4540 199237 : max_blocks -= lblk;
4541 :
4542 203852 : inode_lock(inode);
4543 :
4544 : /*
4545 : * Indirect files do not support unwritten extents
4546 : */
4547 203856 : if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4548 17 : ret = -EOPNOTSUPP;
4549 17 : goto out_mutex;
4550 : }
4551 :
4552 203839 : if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4553 100276 : (offset + len > inode->i_size ||
4554 66594 : offset + len > EXT4_I(inode)->i_disksize)) {
4555 36400 : new_size = offset + len;
4556 36400 : ret = inode_newsize_ok(inode, new_size);
4557 36400 : if (ret)
4558 0 : goto out_mutex;
4559 : }
4560 :
4561 203839 : flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4562 :
4563 : /* Wait all existing dio workers, newcomers will block on i_rwsem */
4564 203839 : inode_dio_wait(inode);
4565 :
4566 203839 : ret = file_modified(file);
4567 203837 : if (ret)
4568 0 : goto out_mutex;
4569 :
4570 : /* Preallocate the range including the unaligned edges */
4571 203837 : if (partial_begin || partial_end) {
4572 200245 : ret = ext4_alloc_file_blocks(file,
4573 200245 : round_down(offset, 1 << blkbits) >> blkbits,
4574 200245 : (round_up((offset + len), 1 << blkbits) -
4575 200245 : round_down(offset, 1 << blkbits)) >> blkbits,
4576 : new_size, flags);
4577 200244 : if (ret)
4578 2841 : goto out_mutex;
4579 :
4580 : }
4581 :
4582 : /* Zero range excluding the unaligned edges */
4583 200995 : if (max_blocks > 0) {
4584 186502 : flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4585 : EXT4_EX_NOCACHE);
4586 :
4587 : /*
4588 : * Prevent page faults from reinstantiating pages we have
4589 : * released from page cache.
4590 : */
4591 186502 : filemap_invalidate_lock(mapping);
4592 :
4593 186504 : ret = ext4_break_layouts(inode);
4594 186501 : if (ret) {
4595 0 : filemap_invalidate_unlock(mapping);
4596 0 : goto out_mutex;
4597 : }
4598 :
4599 186501 : ret = ext4_update_disksize_before_punch(inode, offset, len);
4600 186500 : if (ret) {
4601 0 : filemap_invalidate_unlock(mapping);
4602 0 : goto out_mutex;
4603 : }
4604 :
4605 : /*
4606 : * For journalled data we need to write (and checkpoint) pages
4607 : * before discarding page cache to avoid inconsitent data on
4608 : * disk in case of crash before zeroing trans is committed.
4609 : */
4610 186500 : if (ext4_should_journal_data(inode)) {
4611 0 : ret = filemap_write_and_wait_range(mapping, start, end);
4612 0 : if (ret) {
4613 0 : filemap_invalidate_unlock(mapping);
4614 0 : goto out_mutex;
4615 : }
4616 : }
4617 :
4618 : /* Now release the pages and zero block aligned part of pages */
4619 186499 : truncate_pagecache_range(inode, start, end - 1);
4620 186500 : inode->i_mtime = inode->i_ctime = current_time(inode);
4621 :
4622 186500 : ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4623 : flags);
4624 186505 : filemap_invalidate_unlock(mapping);
4625 186505 : if (ret)
4626 0 : goto out_mutex;
4627 : }
4628 200998 : if (!partial_begin && !partial_end)
4629 3594 : goto out_mutex;
4630 :
4631 : /*
4632 : * In worst case we have to writeout two nonadjacent unwritten
4633 : * blocks and update the inode
4634 : */
4635 197404 : credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
4636 197404 : if (ext4_should_journal_data(inode))
4637 0 : credits += 2;
4638 197402 : handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
4639 197403 : if (IS_ERR(handle)) {
4640 0 : ret = PTR_ERR(handle);
4641 0 : ext4_std_error(inode->i_sb, ret);
4642 0 : goto out_mutex;
4643 : }
4644 :
4645 197403 : inode->i_mtime = inode->i_ctime = current_time(inode);
4646 197403 : if (new_size)
4647 35303 : ext4_update_inode_size(inode, new_size);
4648 197403 : ret = ext4_mark_inode_dirty(handle, inode);
4649 197404 : if (unlikely(ret))
4650 0 : goto out_handle;
4651 : /* Zero out partial block at the edges of the range */
4652 197404 : ret = ext4_zero_partial_blocks(handle, inode, offset, len);
4653 197403 : if (ret >= 0)
4654 197403 : ext4_update_inode_fsync_trans(handle, inode, 1);
4655 :
4656 197403 : if (file->f_flags & O_SYNC)
4657 0 : ext4_handle_sync(handle);
4658 :
4659 197403 : out_handle:
4660 197403 : ext4_journal_stop(handle);
4661 203856 : out_mutex:
4662 203856 : inode_unlock(inode);
4663 203856 : return ret;
4664 : }
4665 :
4666 : /*
4667 : * preallocate space for a file. This implements ext4's fallocate file
4668 : * operation, which gets called from sys_fallocate system call.
4669 : * For block-mapped files, posix_fallocate should fall back to the method
4670 : * of writing zeroes to the required new blocks (the same behavior which is
4671 : * expected for file systems which do not support fallocate() system call).
4672 : */
4673 1454071 : long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4674 : {
4675 1454071 : struct inode *inode = file_inode(file);
4676 1454071 : loff_t new_size = 0;
4677 1454071 : unsigned int max_blocks;
4678 1454071 : int ret = 0;
4679 1454071 : int flags;
4680 1454071 : ext4_lblk_t lblk;
4681 1454071 : unsigned int blkbits = inode->i_blkbits;
4682 :
4683 : /*
4684 : * Encrypted inodes can't handle collapse range or insert
4685 : * range since we would need to re-encrypt blocks with a
4686 : * different IV or XTS tweak (which are based on the logical
4687 : * block number).
4688 : */
4689 1454071 : if (IS_ENCRYPTED(inode) &&
4690 0 : (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
4691 : return -EOPNOTSUPP;
4692 :
4693 : /* Return error if mode is not supported */
4694 1454071 : if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4695 : FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
4696 : FALLOC_FL_INSERT_RANGE))
4697 : return -EOPNOTSUPP;
4698 :
4699 1454071 : inode_lock(inode);
4700 1454079 : ret = ext4_convert_inline_data(inode);
4701 1454100 : inode_unlock(inode);
4702 1454067 : if (ret)
4703 0 : goto exit;
4704 :
4705 1454067 : if (mode & FALLOC_FL_PUNCH_HOLE) {
4706 249488 : ret = ext4_punch_hole(file, offset, len);
4707 249496 : goto exit;
4708 : }
4709 :
4710 1204579 : if (mode & FALLOC_FL_COLLAPSE_RANGE) {
4711 202540 : ret = ext4_collapse_range(file, offset, len);
4712 202540 : goto exit;
4713 : }
4714 :
4715 1002039 : if (mode & FALLOC_FL_INSERT_RANGE) {
4716 146778 : ret = ext4_insert_range(file, offset, len);
4717 146779 : goto exit;
4718 : }
4719 :
4720 855261 : if (mode & FALLOC_FL_ZERO_RANGE) {
4721 203855 : ret = ext4_zero_range(file, offset, len, mode);
4722 203856 : goto exit;
4723 : }
4724 651406 : trace_ext4_fallocate_enter(inode, offset, len, mode);
4725 651382 : lblk = offset >> blkbits;
4726 :
4727 651382 : max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
4728 651382 : flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4729 :
4730 651382 : inode_lock(inode);
4731 :
4732 : /*
4733 : * We only support preallocation for extent-based files only
4734 : */
4735 651435 : if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4736 38 : ret = -EOPNOTSUPP;
4737 38 : goto out;
4738 : }
4739 :
4740 651397 : if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4741 537485 : (offset + len > inode->i_size ||
4742 95871 : offset + len > EXT4_I(inode)->i_disksize)) {
4743 444216 : new_size = offset + len;
4744 444216 : ret = inode_newsize_ok(inode, new_size);
4745 444216 : if (ret)
4746 1 : goto out;
4747 : }
4748 :
4749 : /* Wait all existing dio workers, newcomers will block on i_rwsem */
4750 651396 : inode_dio_wait(inode);
4751 :
4752 651390 : ret = file_modified(file);
4753 651398 : if (ret)
4754 0 : goto out;
4755 :
4756 651398 : ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
4757 651397 : if (ret)
4758 8988 : goto out;
4759 :
4760 642409 : if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
4761 0 : ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
4762 0 : EXT4_I(inode)->i_sync_tid);
4763 : }
4764 642409 : out:
4765 651436 : inode_unlock(inode);
4766 651435 : trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
4767 1454103 : exit:
4768 1454103 : return ret;
4769 : }
4770 :
4771 : /*
4772 : * This function convert a range of blocks to written extents
4773 : * The caller of this function will pass the start offset and the size.
4774 : * all unwritten extents within this range will be converted to
4775 : * written extents.
4776 : *
4777 : * This function is called from the direct IO end io call back
4778 : * function, to convert the fallocated extents after IO is completed.
4779 : * Returns 0 on success.
4780 : */
4781 1508585 : int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
4782 : loff_t offset, ssize_t len)
4783 : {
4784 1508585 : unsigned int max_blocks;
4785 1508585 : int ret = 0, ret2 = 0, ret3 = 0;
4786 1508585 : struct ext4_map_blocks map;
4787 1508585 : unsigned int blkbits = inode->i_blkbits;
4788 1508585 : unsigned int credits = 0;
4789 :
4790 1508585 : map.m_lblk = offset >> blkbits;
4791 1508585 : max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
4792 :
4793 1508585 : if (!handle) {
4794 : /*
4795 : * credits to insert 1 extent into extent tree
4796 : */
4797 411915 : credits = ext4_chunk_trans_blocks(inode, max_blocks);
4798 : }
4799 3205467 : while (ret >= 0 && ret < max_blocks) {
4800 1696901 : map.m_lblk += ret;
4801 1696901 : map.m_len = (max_blocks -= ret);
4802 1696901 : if (credits) {
4803 600203 : handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4804 : credits);
4805 600199 : if (IS_ERR(handle)) {
4806 1 : ret = PTR_ERR(handle);
4807 1 : break;
4808 : }
4809 : }
4810 1696896 : ret = ext4_map_blocks(handle, inode, &map,
4811 : EXT4_GET_BLOCKS_IO_CONVERT_EXT);
4812 1696839 : if (ret <= 0)
4813 0 : ext4_warning(inode->i_sb,
4814 : "inode #%lu: block %u: len %u: "
4815 : "ext4_ext_map_blocks returned %d",
4816 : inode->i_ino, map.m_lblk,
4817 : map.m_len, ret);
4818 1696839 : ret2 = ext4_mark_inode_dirty(handle, inode);
4819 1696884 : if (credits) {
4820 600204 : ret3 = ext4_journal_stop(handle);
4821 600204 : if (unlikely(ret3))
4822 0 : ret2 = ret3;
4823 : }
4824 :
4825 1696884 : if (ret <= 0 || ret2)
4826 : break;
4827 : }
4828 1508567 : return ret > 0 ? ret2 : ret;
4829 : }
4830 :
4831 1096706 : int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
4832 : {
4833 1096706 : int ret = 0, err = 0;
4834 1096706 : struct ext4_io_end_vec *io_end_vec;
4835 :
4836 : /*
4837 : * This is somewhat ugly but the idea is clear: When transaction is
4838 : * reserved, everything goes into it. Otherwise we rather start several
4839 : * smaller transactions for conversion of each extent separately.
4840 : */
4841 1096706 : if (handle) {
4842 1096685 : handle = ext4_journal_start_reserved(handle,
4843 : EXT4_HT_EXT_CONVERT);
4844 1096685 : if (IS_ERR(handle))
4845 0 : return PTR_ERR(handle);
4846 : }
4847 :
4848 2193412 : list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
4849 1096706 : ret = ext4_convert_unwritten_extents(handle, io_end->inode,
4850 : io_end_vec->offset,
4851 : io_end_vec->size);
4852 1096706 : if (ret)
4853 : break;
4854 : }
4855 :
4856 1096706 : if (handle)
4857 1096685 : err = ext4_journal_stop(handle);
4858 :
4859 1096706 : return ret < 0 ? ret : err;
4860 : }
4861 :
4862 3358 : static int ext4_iomap_xattr_fiemap(struct inode *inode, struct iomap *iomap)
4863 : {
4864 3358 : __u64 physical = 0;
4865 3358 : __u64 length = 0;
4866 3358 : int blockbits = inode->i_sb->s_blocksize_bits;
4867 3358 : int error = 0;
4868 3358 : u16 iomap_type;
4869 :
4870 : /* in-inode? */
4871 3358 : if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
4872 581 : struct ext4_iloc iloc;
4873 581 : int offset; /* offset of xattr in inode */
4874 :
4875 581 : error = ext4_get_inode_loc(inode, &iloc);
4876 581 : if (error)
4877 0 : return error;
4878 581 : physical = (__u64)iloc.bh->b_blocknr << blockbits;
4879 581 : offset = EXT4_GOOD_OLD_INODE_SIZE +
4880 581 : EXT4_I(inode)->i_extra_isize;
4881 581 : physical += offset;
4882 581 : length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
4883 581 : brelse(iloc.bh);
4884 581 : iomap_type = IOMAP_INLINE;
4885 2777 : } else if (EXT4_I(inode)->i_file_acl) { /* external block */
4886 201 : physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits;
4887 201 : length = inode->i_sb->s_blocksize;
4888 201 : iomap_type = IOMAP_MAPPED;
4889 : } else {
4890 : /* no in-inode or external block for xattr, so return -ENOENT */
4891 2576 : error = -ENOENT;
4892 2576 : goto out;
4893 : }
4894 :
4895 782 : iomap->addr = physical;
4896 782 : iomap->offset = 0;
4897 782 : iomap->length = length;
4898 782 : iomap->type = iomap_type;
4899 782 : iomap->flags = 0;
4900 : out:
4901 : return error;
4902 : }
4903 :
4904 3358 : static int ext4_iomap_xattr_begin(struct inode *inode, loff_t offset,
4905 : loff_t length, unsigned flags,
4906 : struct iomap *iomap, struct iomap *srcmap)
4907 : {
4908 3358 : int error;
4909 :
4910 3358 : error = ext4_iomap_xattr_fiemap(inode, iomap);
4911 3358 : if (error == 0 && (offset >= iomap->length))
4912 778 : error = -ENOENT;
4913 3358 : return error;
4914 : }
4915 :
4916 : static const struct iomap_ops ext4_iomap_xattr_ops = {
4917 : .iomap_begin = ext4_iomap_xattr_begin,
4918 : };
4919 :
4920 41911 : static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len)
4921 : {
4922 41911 : u64 maxbytes;
4923 :
4924 41911 : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
4925 41894 : maxbytes = inode->i_sb->s_maxbytes;
4926 : else
4927 17 : maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
4928 :
4929 41911 : if (*len == 0)
4930 : return -EINVAL;
4931 41911 : if (start > maxbytes)
4932 : return -EFBIG;
4933 :
4934 : /*
4935 : * Shrink request scope to what the fs can actually handle.
4936 : */
4937 41911 : if (*len > maxbytes || (maxbytes - *len) < start)
4938 26449 : *len = maxbytes - start;
4939 : return 0;
4940 : }
4941 :
4942 41912 : int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4943 : u64 start, u64 len)
4944 : {
4945 41912 : int error = 0;
4946 :
4947 41912 : if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4948 0 : error = ext4_ext_precache(inode);
4949 0 : if (error)
4950 : return error;
4951 0 : fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
4952 : }
4953 :
4954 : /*
4955 : * For bitmap files the maximum size limit could be smaller than
4956 : * s_maxbytes, so check len here manually instead of just relying on the
4957 : * generic check.
4958 : */
4959 41912 : error = ext4_fiemap_check_ranges(inode, start, &len);
4960 41910 : if (error)
4961 : return error;
4962 :
4963 41910 : if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
4964 6775 : fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
4965 6775 : return iomap_fiemap(inode, fieinfo, start, len,
4966 : &ext4_iomap_xattr_ops);
4967 : }
4968 :
4969 35135 : return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_report_ops);
4970 : }
4971 :
4972 0 : int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
4973 : __u64 start, __u64 len)
4974 : {
4975 0 : ext4_lblk_t start_blk, len_blks;
4976 0 : __u64 last_blk;
4977 0 : int error = 0;
4978 :
4979 0 : if (ext4_has_inline_data(inode)) {
4980 0 : int has_inline;
4981 :
4982 0 : down_read(&EXT4_I(inode)->xattr_sem);
4983 0 : has_inline = ext4_has_inline_data(inode);
4984 0 : up_read(&EXT4_I(inode)->xattr_sem);
4985 0 : if (has_inline)
4986 : return 0;
4987 : }
4988 :
4989 0 : if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4990 0 : error = ext4_ext_precache(inode);
4991 0 : if (error)
4992 : return error;
4993 0 : fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
4994 : }
4995 :
4996 0 : error = fiemap_prep(inode, fieinfo, start, &len, 0);
4997 0 : if (error)
4998 : return error;
4999 :
5000 0 : error = ext4_fiemap_check_ranges(inode, start, &len);
5001 0 : if (error)
5002 : return error;
5003 :
5004 0 : start_blk = start >> inode->i_sb->s_blocksize_bits;
5005 0 : last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
5006 0 : if (last_blk >= EXT_MAX_BLOCKS)
5007 : last_blk = EXT_MAX_BLOCKS-1;
5008 0 : len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
5009 :
5010 : /*
5011 : * Walk the extent tree gathering extent information
5012 : * and pushing extents back to the user.
5013 : */
5014 0 : return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
5015 : }
5016 :
5017 : /*
5018 : * ext4_ext_shift_path_extents:
5019 : * Shift the extents of a path structure lying between path[depth].p_ext
5020 : * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
5021 : * if it is right shift or left shift operation.
5022 : */
5023 : static int
5024 348822 : ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5025 : struct inode *inode, handle_t *handle,
5026 : enum SHIFT_DIRECTION SHIFT)
5027 : {
5028 348822 : int depth, err = 0;
5029 348822 : struct ext4_extent *ex_start, *ex_last;
5030 348822 : bool update = false;
5031 348822 : int credits, restart_credits;
5032 348822 : depth = path->p_depth;
5033 :
5034 359918 : while (depth >= 0) {
5035 348828 : if (depth == path->p_depth) {
5036 348822 : ex_start = path[depth].p_ext;
5037 348822 : if (!ex_start)
5038 : return -EFSCORRUPTED;
5039 :
5040 348822 : ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
5041 : /* leaf + sb + inode */
5042 348822 : credits = 3;
5043 348822 : if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) {
5044 58848 : update = true;
5045 : /* extent tree + sb + inode */
5046 58848 : credits = depth + 2;
5047 : }
5048 :
5049 348822 : restart_credits = ext4_writepage_trans_blocks(inode);
5050 348822 : err = ext4_datasem_ensure_credits(handle, inode, credits,
5051 : restart_credits, 0);
5052 348820 : if (err) {
5053 0 : if (err > 0)
5054 0 : err = -EAGAIN;
5055 0 : goto out;
5056 : }
5057 :
5058 348820 : err = ext4_ext_get_access(handle, inode, path + depth);
5059 348823 : if (err)
5060 0 : goto out;
5061 :
5062 10510733 : while (ex_start <= ex_last) {
5063 10161911 : if (SHIFT == SHIFT_LEFT) {
5064 9416711 : le32_add_cpu(&ex_start->ee_block,
5065 : -shift);
5066 : /* Try to merge to the left. */
5067 9416711 : if ((ex_start >
5068 9416711 : EXT_FIRST_EXTENT(path[depth].p_hdr))
5069 9366795 : &&
5070 9366798 : ext4_ext_try_to_merge_right(inode,
5071 : path, ex_start - 1))
5072 84 : ex_last--;
5073 : else
5074 9416624 : ex_start++;
5075 : } else {
5076 745200 : le32_add_cpu(&ex_last->ee_block, shift);
5077 745200 : ext4_ext_try_to_merge_right(inode, path,
5078 : ex_last);
5079 745202 : ex_last--;
5080 : }
5081 : }
5082 348822 : err = ext4_ext_dirty(handle, inode, path + depth);
5083 348823 : if (err)
5084 0 : goto out;
5085 :
5086 348823 : if (--depth < 0 || !update)
5087 : break;
5088 : }
5089 :
5090 : /* Update index too */
5091 51498 : err = ext4_ext_get_access(handle, inode, path + depth);
5092 51498 : if (err)
5093 0 : goto out;
5094 :
5095 51498 : if (SHIFT == SHIFT_LEFT)
5096 46184 : le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
5097 : else
5098 5314 : le32_add_cpu(&path[depth].p_idx->ei_block, shift);
5099 51498 : err = ext4_ext_dirty(handle, inode, path + depth);
5100 51498 : if (err)
5101 0 : goto out;
5102 :
5103 : /* we are done if current index is not a starting index */
5104 51498 : if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
5105 : break;
5106 :
5107 11096 : depth--;
5108 : }
5109 :
5110 348823 : out:
5111 : return err;
5112 : }
5113 :
5114 : /*
5115 : * ext4_ext_shift_extents:
5116 : * All the extents which lies in the range from @start to the last allocated
5117 : * block for the @inode are shifted either towards left or right (depending
5118 : * upon @SHIFT) by @shift blocks.
5119 : * On success, 0 is returned, error otherwise.
5120 : */
5121 : static int
5122 339244 : ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5123 : ext4_lblk_t start, ext4_lblk_t shift,
5124 : enum SHIFT_DIRECTION SHIFT)
5125 : {
5126 339244 : struct ext4_ext_path *path;
5127 339244 : int ret = 0, depth;
5128 339244 : struct ext4_extent *extent;
5129 339244 : ext4_lblk_t stop, *iterator, ex_start, ex_end;
5130 339244 : ext4_lblk_t tmp = EXT_MAX_BLOCKS;
5131 :
5132 : /* Let path point to the last extent */
5133 339244 : path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5134 : EXT4_EX_NOCACHE);
5135 339244 : if (IS_ERR(path))
5136 0 : return PTR_ERR(path);
5137 :
5138 339244 : depth = path->p_depth;
5139 339244 : extent = path[depth].p_ext;
5140 339244 : if (!extent)
5141 10054 : goto out;
5142 :
5143 329190 : stop = le32_to_cpu(extent->ee_block);
5144 :
5145 : /*
5146 : * For left shifts, make sure the hole on the left is big enough to
5147 : * accommodate the shift. For right shifts, make sure the last extent
5148 : * won't be shifted beyond EXT_MAX_BLOCKS.
5149 : */
5150 329190 : if (SHIFT == SHIFT_LEFT) {
5151 188229 : path = ext4_find_extent(inode, start - 1, &path,
5152 : EXT4_EX_NOCACHE);
5153 188229 : if (IS_ERR(path))
5154 0 : return PTR_ERR(path);
5155 188229 : depth = path->p_depth;
5156 188229 : extent = path[depth].p_ext;
5157 188229 : if (extent) {
5158 188229 : ex_start = le32_to_cpu(extent->ee_block);
5159 188229 : ex_end = le32_to_cpu(extent->ee_block) +
5160 188229 : ext4_ext_get_actual_len(extent);
5161 : } else {
5162 : ex_start = 0;
5163 : ex_end = 0;
5164 : }
5165 :
5166 188229 : if ((start == ex_start && shift > ex_start) ||
5167 188229 : (shift > start - ex_end)) {
5168 0 : ret = -EINVAL;
5169 0 : goto out;
5170 : }
5171 : } else {
5172 140961 : if (shift > EXT_MAX_BLOCKS -
5173 140961 : (stop + ext4_ext_get_actual_len(extent))) {
5174 1 : ret = -EINVAL;
5175 1 : goto out;
5176 : }
5177 : }
5178 :
5179 : /*
5180 : * In case of left shift, iterator points to start and it is increased
5181 : * till we reach stop. In case of right shift, iterator points to stop
5182 : * and it is decreased till we reach start.
5183 : */
5184 140960 : again:
5185 329189 : ret = 0;
5186 329189 : if (SHIFT == SHIFT_LEFT)
5187 : iterator = &start;
5188 : else
5189 140960 : iterator = &stop;
5190 :
5191 329189 : if (tmp != EXT_MAX_BLOCKS)
5192 0 : *iterator = tmp;
5193 :
5194 : /*
5195 : * Its safe to start updating extents. Start and stop are unsigned, so
5196 : * in case of right shift if extent with 0 block is reached, iterator
5197 : * becomes NULL to indicate the end of the loop.
5198 : */
5199 678012 : while (iterator && start <= stop) {
5200 348824 : path = ext4_find_extent(inode, *iterator, &path,
5201 : EXT4_EX_NOCACHE);
5202 348824 : if (IS_ERR(path))
5203 0 : return PTR_ERR(path);
5204 348824 : depth = path->p_depth;
5205 348824 : extent = path[depth].p_ext;
5206 348824 : if (!extent) {
5207 0 : EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
5208 : (unsigned long) *iterator);
5209 0 : return -EFSCORRUPTED;
5210 : }
5211 348824 : if (SHIFT == SHIFT_LEFT && *iterator >
5212 215702 : le32_to_cpu(extent->ee_block)) {
5213 : /* Hole, move to the next extent */
5214 63725 : if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
5215 63725 : path[depth].p_ext++;
5216 : } else {
5217 0 : *iterator = ext4_ext_next_allocated_block(path);
5218 0 : continue;
5219 : }
5220 : }
5221 :
5222 348824 : tmp = *iterator;
5223 348824 : if (SHIFT == SHIFT_LEFT) {
5224 215703 : extent = EXT_LAST_EXTENT(path[depth].p_hdr);
5225 215703 : *iterator = le32_to_cpu(extent->ee_block) +
5226 215703 : ext4_ext_get_actual_len(extent);
5227 : } else {
5228 133121 : extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
5229 133121 : if (le32_to_cpu(extent->ee_block) > start)
5230 159 : *iterator = le32_to_cpu(extent->ee_block) - 1;
5231 132962 : else if (le32_to_cpu(extent->ee_block) == start)
5232 : iterator = NULL;
5233 : else {
5234 124186 : extent = EXT_LAST_EXTENT(path[depth].p_hdr);
5235 768998 : while (le32_to_cpu(extent->ee_block) >= start)
5236 644812 : extent--;
5237 :
5238 124186 : if (extent == EXT_LAST_EXTENT(path[depth].p_hdr))
5239 : break;
5240 :
5241 124186 : extent++;
5242 124186 : iterator = NULL;
5243 : }
5244 133121 : path[depth].p_ext = extent;
5245 : }
5246 348824 : ret = ext4_ext_shift_path_extents(path, shift, inode,
5247 : handle, SHIFT);
5248 : /* iterator can be NULL which means we should break */
5249 348823 : if (ret == -EAGAIN)
5250 0 : goto again;
5251 348823 : if (ret)
5252 : break;
5253 : }
5254 329188 : out:
5255 339243 : ext4_free_ext_path(path);
5256 339244 : return ret;
5257 : }
5258 :
5259 : /*
5260 : * ext4_collapse_range:
5261 : * This implements the fallocate's collapse range functionality for ext4
5262 : * Returns: 0 and non-zero on error.
5263 : */
5264 202540 : static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
5265 : {
5266 202540 : struct inode *inode = file_inode(file);
5267 202540 : struct super_block *sb = inode->i_sb;
5268 202540 : struct address_space *mapping = inode->i_mapping;
5269 202540 : ext4_lblk_t punch_start, punch_stop;
5270 202540 : handle_t *handle;
5271 202540 : unsigned int credits;
5272 202540 : loff_t new_size, ioffset;
5273 202540 : int ret;
5274 :
5275 : /*
5276 : * We need to test this early because xfstests assumes that a
5277 : * collapse range of (0, 1) will return EOPNOTSUPP if the file
5278 : * system does not support collapse range.
5279 : */
5280 202540 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5281 : return -EOPNOTSUPP;
5282 :
5283 : /* Collapse range works only on fs cluster size aligned regions. */
5284 202531 : if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
5285 : return -EINVAL;
5286 :
5287 199392 : trace_ext4_collapse_range(inode, offset, len);
5288 :
5289 199391 : punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5290 199391 : punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5291 :
5292 199391 : inode_lock(inode);
5293 : /*
5294 : * There is no need to overlap collapse range with EOF, in which case
5295 : * it is effectively a truncate operation
5296 : */
5297 199392 : if (offset + len >= inode->i_size) {
5298 2103 : ret = -EINVAL;
5299 2103 : goto out_mutex;
5300 : }
5301 :
5302 : /* Currently just for extent based files */
5303 197289 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5304 0 : ret = -EOPNOTSUPP;
5305 0 : goto out_mutex;
5306 : }
5307 :
5308 : /* Wait for existing dio to complete */
5309 197289 : inode_dio_wait(inode);
5310 :
5311 197289 : ret = file_modified(file);
5312 197289 : if (ret)
5313 0 : goto out_mutex;
5314 :
5315 : /*
5316 : * Prevent page faults from reinstantiating pages we have released from
5317 : * page cache.
5318 : */
5319 197289 : filemap_invalidate_lock(mapping);
5320 :
5321 197289 : ret = ext4_break_layouts(inode);
5322 197288 : if (ret)
5323 0 : goto out_mmap;
5324 :
5325 : /*
5326 : * Need to round down offset to be aligned with page size boundary
5327 : * for page size > block size.
5328 : */
5329 197288 : ioffset = round_down(offset, PAGE_SIZE);
5330 : /*
5331 : * Write tail of the last page before removed range since it will get
5332 : * removed from the page cache below.
5333 : */
5334 197288 : ret = filemap_write_and_wait_range(mapping, ioffset, offset);
5335 197289 : if (ret)
5336 0 : goto out_mmap;
5337 : /*
5338 : * Write data that will be shifted to preserve them when discarding
5339 : * page cache below. We are also protected from pages becoming dirty
5340 : * by i_rwsem and invalidate_lock.
5341 : */
5342 197289 : ret = filemap_write_and_wait_range(mapping, offset + len,
5343 : LLONG_MAX);
5344 197289 : if (ret)
5345 0 : goto out_mmap;
5346 197289 : truncate_pagecache(inode, ioffset);
5347 :
5348 197289 : credits = ext4_writepage_trans_blocks(inode);
5349 197289 : handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5350 197289 : if (IS_ERR(handle)) {
5351 0 : ret = PTR_ERR(handle);
5352 0 : goto out_mmap;
5353 : }
5354 197289 : ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
5355 :
5356 197289 : down_write(&EXT4_I(inode)->i_data_sem);
5357 197289 : ext4_discard_preallocations(inode, 0);
5358 197289 : ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start);
5359 :
5360 197289 : ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
5361 197289 : if (ret) {
5362 0 : up_write(&EXT4_I(inode)->i_data_sem);
5363 0 : goto out_stop;
5364 : }
5365 197289 : ext4_discard_preallocations(inode, 0);
5366 :
5367 197289 : ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5368 : punch_stop - punch_start, SHIFT_LEFT);
5369 197289 : if (ret) {
5370 0 : up_write(&EXT4_I(inode)->i_data_sem);
5371 0 : goto out_stop;
5372 : }
5373 :
5374 197289 : new_size = inode->i_size - len;
5375 197289 : i_size_write(inode, new_size);
5376 197289 : EXT4_I(inode)->i_disksize = new_size;
5377 :
5378 197289 : up_write(&EXT4_I(inode)->i_data_sem);
5379 197289 : if (IS_SYNC(inode))
5380 0 : ext4_handle_sync(handle);
5381 197289 : inode->i_mtime = inode->i_ctime = current_time(inode);
5382 197288 : ret = ext4_mark_inode_dirty(handle, inode);
5383 197289 : ext4_update_inode_fsync_trans(handle, inode, 1);
5384 :
5385 197289 : out_stop:
5386 197289 : ext4_journal_stop(handle);
5387 197289 : out_mmap:
5388 197289 : filemap_invalidate_unlock(mapping);
5389 199392 : out_mutex:
5390 199392 : inode_unlock(inode);
5391 199392 : return ret;
5392 : }
5393 :
5394 : /*
5395 : * ext4_insert_range:
5396 : * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
5397 : * The data blocks starting from @offset to the EOF are shifted by @len
5398 : * towards right to create a hole in the @inode. Inode size is increased
5399 : * by len bytes.
5400 : * Returns 0 on success, error otherwise.
5401 : */
5402 146779 : static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
5403 : {
5404 146779 : struct inode *inode = file_inode(file);
5405 146779 : struct super_block *sb = inode->i_sb;
5406 146779 : struct address_space *mapping = inode->i_mapping;
5407 146779 : handle_t *handle;
5408 146779 : struct ext4_ext_path *path;
5409 146779 : struct ext4_extent *extent;
5410 146779 : ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
5411 146779 : unsigned int credits, ee_len;
5412 146779 : int ret = 0, depth, split_flag = 0;
5413 146779 : loff_t ioffset;
5414 :
5415 : /*
5416 : * We need to test this early because xfstests assumes that an
5417 : * insert range of (0, 1) will return EOPNOTSUPP if the file
5418 : * system does not support insert range.
5419 : */
5420 146779 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5421 : return -EOPNOTSUPP;
5422 :
5423 : /* Insert range works only on fs cluster size aligned regions. */
5424 146773 : if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
5425 : return -EINVAL;
5426 :
5427 143588 : trace_ext4_insert_range(inode, offset, len);
5428 :
5429 143587 : offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5430 143587 : len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
5431 :
5432 143587 : inode_lock(inode);
5433 : /* Currently just for extent based files */
5434 143587 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5435 0 : ret = -EOPNOTSUPP;
5436 0 : goto out_mutex;
5437 : }
5438 :
5439 : /* Check whether the maximum file size would be exceeded */
5440 143587 : if (len > inode->i_sb->s_maxbytes - inode->i_size) {
5441 1 : ret = -EFBIG;
5442 1 : goto out_mutex;
5443 : }
5444 :
5445 : /* Offset must be less than i_size */
5446 143586 : if (offset >= inode->i_size) {
5447 1632 : ret = -EINVAL;
5448 1632 : goto out_mutex;
5449 : }
5450 :
5451 : /* Wait for existing dio to complete */
5452 141954 : inode_dio_wait(inode);
5453 :
5454 141954 : ret = file_modified(file);
5455 141955 : if (ret)
5456 0 : goto out_mutex;
5457 :
5458 : /*
5459 : * Prevent page faults from reinstantiating pages we have released from
5460 : * page cache.
5461 : */
5462 141955 : filemap_invalidate_lock(mapping);
5463 :
5464 141955 : ret = ext4_break_layouts(inode);
5465 141954 : if (ret)
5466 0 : goto out_mmap;
5467 :
5468 : /*
5469 : * Need to round down to align start offset to page size boundary
5470 : * for page size > block size.
5471 : */
5472 141954 : ioffset = round_down(offset, PAGE_SIZE);
5473 : /* Write out all dirty pages */
5474 141954 : ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5475 : LLONG_MAX);
5476 141954 : if (ret)
5477 0 : goto out_mmap;
5478 141954 : truncate_pagecache(inode, ioffset);
5479 :
5480 141955 : credits = ext4_writepage_trans_blocks(inode);
5481 141954 : handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5482 141954 : if (IS_ERR(handle)) {
5483 0 : ret = PTR_ERR(handle);
5484 0 : goto out_mmap;
5485 : }
5486 141954 : ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
5487 :
5488 : /* Expand file to avoid data loss if there is error while shifting */
5489 141954 : inode->i_size += len;
5490 141954 : EXT4_I(inode)->i_disksize += len;
5491 141954 : inode->i_mtime = inode->i_ctime = current_time(inode);
5492 141954 : ret = ext4_mark_inode_dirty(handle, inode);
5493 141955 : if (ret)
5494 0 : goto out_stop;
5495 :
5496 141955 : down_write(&EXT4_I(inode)->i_data_sem);
5497 141955 : ext4_discard_preallocations(inode, 0);
5498 :
5499 141955 : path = ext4_find_extent(inode, offset_lblk, NULL, 0);
5500 141955 : if (IS_ERR(path)) {
5501 0 : up_write(&EXT4_I(inode)->i_data_sem);
5502 0 : goto out_stop;
5503 : }
5504 :
5505 141955 : depth = ext_depth(inode);
5506 141955 : extent = path[depth].p_ext;
5507 141955 : if (extent) {
5508 140961 : ee_start_lblk = le32_to_cpu(extent->ee_block);
5509 140961 : ee_len = ext4_ext_get_actual_len(extent);
5510 :
5511 : /*
5512 : * If offset_lblk is not the starting block of extent, split
5513 : * the extent @offset_lblk
5514 : */
5515 140961 : if ((offset_lblk > ee_start_lblk) &&
5516 112274 : (offset_lblk < (ee_start_lblk + ee_len))) {
5517 48051 : if (ext4_ext_is_unwritten(extent))
5518 17674 : split_flag = EXT4_EXT_MARK_UNWRIT1 |
5519 : EXT4_EXT_MARK_UNWRIT2;
5520 48051 : ret = ext4_split_extent_at(handle, inode, &path,
5521 : offset_lblk, split_flag,
5522 : EXT4_EX_NOCACHE |
5523 : EXT4_GET_BLOCKS_PRE_IO |
5524 : EXT4_GET_BLOCKS_METADATA_NOFAIL);
5525 : }
5526 :
5527 140961 : ext4_free_ext_path(path);
5528 140961 : if (ret < 0) {
5529 0 : up_write(&EXT4_I(inode)->i_data_sem);
5530 0 : goto out_stop;
5531 : }
5532 : } else {
5533 994 : ext4_free_ext_path(path);
5534 : }
5535 :
5536 141955 : ext4_es_remove_extent(inode, offset_lblk, EXT_MAX_BLOCKS - offset_lblk);
5537 :
5538 : /*
5539 : * if offset_lblk lies in a hole which is at start of file, use
5540 : * ee_start_lblk to shift extents
5541 : */
5542 141954 : ret = ext4_ext_shift_extents(inode, handle,
5543 141954 : max(ee_start_lblk, offset_lblk), len_lblk, SHIFT_RIGHT);
5544 :
5545 141955 : up_write(&EXT4_I(inode)->i_data_sem);
5546 141954 : if (IS_SYNC(inode))
5547 0 : ext4_handle_sync(handle);
5548 141954 : if (ret >= 0)
5549 141953 : ext4_update_inode_fsync_trans(handle, inode, 1);
5550 :
5551 1 : out_stop:
5552 141954 : ext4_journal_stop(handle);
5553 141954 : out_mmap:
5554 141954 : filemap_invalidate_unlock(mapping);
5555 143588 : out_mutex:
5556 143588 : inode_unlock(inode);
5557 143588 : return ret;
5558 : }
5559 :
5560 : /**
5561 : * ext4_swap_extents() - Swap extents between two inodes
5562 : * @handle: handle for this transaction
5563 : * @inode1: First inode
5564 : * @inode2: Second inode
5565 : * @lblk1: Start block for first inode
5566 : * @lblk2: Start block for second inode
5567 : * @count: Number of blocks to swap
5568 : * @unwritten: Mark second inode's extents as unwritten after swap
5569 : * @erp: Pointer to save error value
5570 : *
5571 : * This helper routine does exactly what is promise "swap extents". All other
5572 : * stuff such as page-cache locking consistency, bh mapping consistency or
5573 : * extent's data copying must be performed by caller.
5574 : * Locking:
5575 : * i_rwsem is held for both inodes
5576 : * i_data_sem is locked for write for both inodes
5577 : * Assumptions:
5578 : * All pages from requested range are locked for both inodes
5579 : */
5580 : int
5581 705080 : ext4_swap_extents(handle_t *handle, struct inode *inode1,
5582 : struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
5583 : ext4_lblk_t count, int unwritten, int *erp)
5584 : {
5585 705080 : struct ext4_ext_path *path1 = NULL;
5586 705080 : struct ext4_ext_path *path2 = NULL;
5587 705080 : int replaced_count = 0;
5588 :
5589 705080 : BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
5590 705080 : BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
5591 705080 : BUG_ON(!inode_is_locked(inode1));
5592 705080 : BUG_ON(!inode_is_locked(inode2));
5593 :
5594 705080 : ext4_es_remove_extent(inode1, lblk1, count);
5595 705080 : ext4_es_remove_extent(inode2, lblk2, count);
5596 :
5597 2195935 : while (count) {
5598 1490855 : struct ext4_extent *ex1, *ex2, tmp_ex;
5599 1490855 : ext4_lblk_t e1_blk, e2_blk;
5600 1490855 : int e1_len, e2_len, len;
5601 1490855 : int split = 0;
5602 :
5603 1490855 : path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
5604 1490855 : if (IS_ERR(path1)) {
5605 0 : *erp = PTR_ERR(path1);
5606 0 : path1 = NULL;
5607 0 : finish:
5608 0 : count = 0;
5609 0 : goto repeat;
5610 : }
5611 1490855 : path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
5612 1490855 : if (IS_ERR(path2)) {
5613 0 : *erp = PTR_ERR(path2);
5614 0 : path2 = NULL;
5615 0 : goto finish;
5616 : }
5617 1490855 : ex1 = path1[path1->p_depth].p_ext;
5618 1490855 : ex2 = path2[path2->p_depth].p_ext;
5619 : /* Do we have something to swap ? */
5620 1490855 : if (unlikely(!ex2 || !ex1))
5621 0 : goto finish;
5622 :
5623 1490855 : e1_blk = le32_to_cpu(ex1->ee_block);
5624 1490855 : e2_blk = le32_to_cpu(ex2->ee_block);
5625 1490855 : e1_len = ext4_ext_get_actual_len(ex1);
5626 1490855 : e2_len = ext4_ext_get_actual_len(ex2);
5627 :
5628 : /* Hole handling */
5629 1490855 : if (!in_range(lblk1, e1_blk, e1_len) ||
5630 1486451 : !in_range(lblk2, e2_blk, e2_len)) {
5631 5752 : ext4_lblk_t next1, next2;
5632 :
5633 : /* if hole after extent, then go to next extent */
5634 5752 : next1 = ext4_ext_next_allocated_block(path1);
5635 5752 : next2 = ext4_ext_next_allocated_block(path2);
5636 : /* If hole before extent, then shift to that extent */
5637 5752 : if (e1_blk > lblk1)
5638 0 : next1 = e1_blk;
5639 5752 : if (e2_blk > lblk2)
5640 4404 : next2 = e2_blk;
5641 : /* Do we have something to swap */
5642 5752 : if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
5643 0 : goto finish;
5644 : /* Move to the rightest boundary */
5645 5752 : len = next1 - lblk1;
5646 5752 : if (len < next2 - lblk2)
5647 : len = next2 - lblk2;
5648 5752 : if (len > count)
5649 5752 : len = count;
5650 5752 : lblk1 += len;
5651 5752 : lblk2 += len;
5652 5752 : count -= len;
5653 5752 : goto repeat;
5654 : }
5655 :
5656 : /* Prepare left boundary */
5657 1485103 : if (e1_blk < lblk1) {
5658 93147 : split = 1;
5659 93147 : *erp = ext4_force_split_extent_at(handle, inode1,
5660 : &path1, lblk1, 0);
5661 93147 : if (unlikely(*erp))
5662 0 : goto finish;
5663 : }
5664 1485103 : if (e2_blk < lblk2) {
5665 1098 : split = 1;
5666 1098 : *erp = ext4_force_split_extent_at(handle, inode2,
5667 : &path2, lblk2, 0);
5668 1098 : if (unlikely(*erp))
5669 0 : goto finish;
5670 : }
5671 : /* ext4_split_extent_at() may result in leaf extent split,
5672 : * path must to be revalidated. */
5673 1484005 : if (split)
5674 93657 : goto repeat;
5675 :
5676 : /* Prepare right boundary */
5677 1391446 : len = count;
5678 1391446 : if (len > e1_blk + e1_len - lblk1)
5679 : len = e1_blk + e1_len - lblk1;
5680 1391446 : if (len > e2_blk + e2_len - lblk2)
5681 0 : len = e2_blk + e2_len - lblk2;
5682 :
5683 1391446 : if (len != e1_len) {
5684 684336 : split = 1;
5685 684336 : *erp = ext4_force_split_extent_at(handle, inode1,
5686 : &path1, lblk1 + len, 0);
5687 684336 : if (unlikely(*erp))
5688 0 : goto finish;
5689 : }
5690 1391446 : if (len != e2_len) {
5691 597342 : split = 1;
5692 597342 : *erp = ext4_force_split_extent_at(handle, inode2,
5693 : &path2, lblk2 + len, 0);
5694 597342 : if (*erp)
5695 0 : goto finish;
5696 : }
5697 : /* ext4_split_extent_at() may result in leaf extent split,
5698 : * path must to be revalidated. */
5699 794104 : if (split)
5700 692118 : goto repeat;
5701 :
5702 699328 : BUG_ON(e2_len != e1_len);
5703 699328 : *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
5704 699328 : if (unlikely(*erp))
5705 0 : goto finish;
5706 699328 : *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
5707 699328 : if (unlikely(*erp))
5708 0 : goto finish;
5709 :
5710 : /* Both extents are fully inside boundaries. Swap it now */
5711 699328 : tmp_ex = *ex1;
5712 699328 : ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
5713 699328 : ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
5714 699328 : ex1->ee_len = cpu_to_le16(e2_len);
5715 699328 : ex2->ee_len = cpu_to_le16(e1_len);
5716 699328 : if (unwritten)
5717 699328 : ext4_ext_mark_unwritten(ex2);
5718 699328 : if (ext4_ext_is_unwritten(&tmp_ex))
5719 648377 : ext4_ext_mark_unwritten(ex1);
5720 :
5721 699328 : ext4_ext_try_to_merge(handle, inode2, path2, ex2);
5722 699328 : ext4_ext_try_to_merge(handle, inode1, path1, ex1);
5723 699328 : *erp = ext4_ext_dirty(handle, inode2, path2 +
5724 : path2->p_depth);
5725 699328 : if (unlikely(*erp))
5726 0 : goto finish;
5727 699328 : *erp = ext4_ext_dirty(handle, inode1, path1 +
5728 : path1->p_depth);
5729 : /*
5730 : * Looks scarry ah..? second inode already points to new blocks,
5731 : * and it was successfully dirtied. But luckily error may happen
5732 : * only due to journal error, so full transaction will be
5733 : * aborted anyway.
5734 : */
5735 699328 : if (unlikely(*erp))
5736 0 : goto finish;
5737 699328 : lblk1 += len;
5738 699328 : lblk2 += len;
5739 699328 : replaced_count += len;
5740 699328 : count -= len;
5741 :
5742 1490855 : repeat:
5743 1490855 : ext4_free_ext_path(path1);
5744 1490855 : ext4_free_ext_path(path2);
5745 1490855 : path1 = path2 = NULL;
5746 : }
5747 705080 : return replaced_count;
5748 : }
5749 :
5750 : /*
5751 : * ext4_clu_mapped - determine whether any block in a logical cluster has
5752 : * been mapped to a physical cluster
5753 : *
5754 : * @inode - file containing the logical cluster
5755 : * @lclu - logical cluster of interest
5756 : *
5757 : * Returns 1 if any block in the logical cluster is mapped, signifying
5758 : * that a physical cluster has been allocated for it. Otherwise,
5759 : * returns 0. Can also return negative error codes. Derived from
5760 : * ext4_ext_map_blocks().
5761 : */
5762 4096 : int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
5763 : {
5764 4096 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5765 4096 : struct ext4_ext_path *path;
5766 4096 : int depth, mapped = 0, err = 0;
5767 4096 : struct ext4_extent *extent;
5768 4096 : ext4_lblk_t first_lblk, first_lclu, last_lclu;
5769 :
5770 : /*
5771 : * if data can be stored inline, the logical cluster isn't
5772 : * mapped - no physical clusters have been allocated, and the
5773 : * file has no extents
5774 : */
5775 4096 : if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) ||
5776 : ext4_has_inline_data(inode))
5777 : return 0;
5778 :
5779 : /* search for the extent closest to the first block in the cluster */
5780 4096 : path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
5781 4096 : if (IS_ERR(path)) {
5782 0 : err = PTR_ERR(path);
5783 0 : path = NULL;
5784 0 : goto out;
5785 : }
5786 :
5787 4096 : depth = ext_depth(inode);
5788 :
5789 : /*
5790 : * A consistent leaf must not be empty. This situation is possible,
5791 : * though, _during_ tree modification, and it's why an assert can't
5792 : * be put in ext4_find_extent().
5793 : */
5794 4096 : if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
5795 0 : EXT4_ERROR_INODE(inode,
5796 : "bad extent address - lblock: %lu, depth: %d, pblock: %lld",
5797 : (unsigned long) EXT4_C2B(sbi, lclu),
5798 : depth, path[depth].p_block);
5799 0 : err = -EFSCORRUPTED;
5800 0 : goto out;
5801 : }
5802 :
5803 4096 : extent = path[depth].p_ext;
5804 :
5805 : /* can't be mapped if the extent tree is empty */
5806 4096 : if (extent == NULL)
5807 3840 : goto out;
5808 :
5809 256 : first_lblk = le32_to_cpu(extent->ee_block);
5810 256 : first_lclu = EXT4_B2C(sbi, first_lblk);
5811 :
5812 : /*
5813 : * Three possible outcomes at this point - found extent spanning
5814 : * the target cluster, to the left of the target cluster, or to the
5815 : * right of the target cluster. The first two cases are handled here.
5816 : * The last case indicates the target cluster is not mapped.
5817 : */
5818 256 : if (lclu >= first_lclu) {
5819 512 : last_lclu = EXT4_B2C(sbi, first_lblk +
5820 : ext4_ext_get_actual_len(extent) - 1);
5821 256 : if (lclu <= last_lclu) {
5822 : mapped = 1;
5823 : } else {
5824 256 : first_lblk = ext4_ext_next_allocated_block(path);
5825 256 : first_lclu = EXT4_B2C(sbi, first_lblk);
5826 256 : if (lclu == first_lclu)
5827 0 : mapped = 1;
5828 : }
5829 : }
5830 :
5831 256 : out:
5832 4096 : ext4_free_ext_path(path);
5833 :
5834 4096 : return err ? err : mapped;
5835 : }
5836 :
5837 : /*
5838 : * Updates physical block address and unwritten status of extent
5839 : * starting at lblk start and of len. If such an extent doesn't exist,
5840 : * this function splits the extent tree appropriately to create an
5841 : * extent like this. This function is called in the fast commit
5842 : * replay path. Returns 0 on success and error on failure.
5843 : */
5844 0 : int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
5845 : int len, int unwritten, ext4_fsblk_t pblk)
5846 : {
5847 0 : struct ext4_ext_path *path = NULL, *ppath;
5848 0 : struct ext4_extent *ex;
5849 0 : int ret;
5850 :
5851 0 : path = ext4_find_extent(inode, start, NULL, 0);
5852 0 : if (IS_ERR(path))
5853 0 : return PTR_ERR(path);
5854 0 : ex = path[path->p_depth].p_ext;
5855 0 : if (!ex) {
5856 0 : ret = -EFSCORRUPTED;
5857 0 : goto out;
5858 : }
5859 :
5860 0 : if (le32_to_cpu(ex->ee_block) != start ||
5861 : ext4_ext_get_actual_len(ex) != len) {
5862 : /* We need to split this extent to match our extent first */
5863 0 : ppath = path;
5864 0 : down_write(&EXT4_I(inode)->i_data_sem);
5865 0 : ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1);
5866 0 : up_write(&EXT4_I(inode)->i_data_sem);
5867 0 : if (ret)
5868 0 : goto out;
5869 0 : kfree(path);
5870 0 : path = ext4_find_extent(inode, start, NULL, 0);
5871 0 : if (IS_ERR(path))
5872 : return -1;
5873 0 : ppath = path;
5874 0 : ex = path[path->p_depth].p_ext;
5875 0 : WARN_ON(le32_to_cpu(ex->ee_block) != start);
5876 0 : if (ext4_ext_get_actual_len(ex) != len) {
5877 0 : down_write(&EXT4_I(inode)->i_data_sem);
5878 0 : ret = ext4_force_split_extent_at(NULL, inode, &ppath,
5879 : start + len, 1);
5880 0 : up_write(&EXT4_I(inode)->i_data_sem);
5881 0 : if (ret)
5882 0 : goto out;
5883 0 : kfree(path);
5884 0 : path = ext4_find_extent(inode, start, NULL, 0);
5885 0 : if (IS_ERR(path))
5886 : return -EINVAL;
5887 0 : ex = path[path->p_depth].p_ext;
5888 : }
5889 : }
5890 0 : if (unwritten)
5891 0 : ext4_ext_mark_unwritten(ex);
5892 : else
5893 0 : ext4_ext_mark_initialized(ex);
5894 0 : ext4_ext_store_pblock(ex, pblk);
5895 0 : down_write(&EXT4_I(inode)->i_data_sem);
5896 0 : ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5897 0 : up_write(&EXT4_I(inode)->i_data_sem);
5898 0 : out:
5899 0 : ext4_free_ext_path(path);
5900 0 : ext4_mark_inode_dirty(NULL, inode);
5901 0 : return ret;
5902 : }
5903 :
5904 : /* Try to shrink the extent tree */
5905 0 : void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
5906 : {
5907 0 : struct ext4_ext_path *path = NULL;
5908 0 : struct ext4_extent *ex;
5909 0 : ext4_lblk_t old_cur, cur = 0;
5910 :
5911 0 : while (cur < end) {
5912 0 : path = ext4_find_extent(inode, cur, NULL, 0);
5913 0 : if (IS_ERR(path))
5914 : return;
5915 0 : ex = path[path->p_depth].p_ext;
5916 0 : if (!ex) {
5917 0 : ext4_free_ext_path(path);
5918 0 : ext4_mark_inode_dirty(NULL, inode);
5919 0 : return;
5920 : }
5921 0 : old_cur = cur;
5922 0 : cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5923 0 : if (cur <= old_cur)
5924 0 : cur = old_cur + 1;
5925 0 : ext4_ext_try_to_merge(NULL, inode, path, ex);
5926 0 : down_write(&EXT4_I(inode)->i_data_sem);
5927 0 : ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5928 0 : up_write(&EXT4_I(inode)->i_data_sem);
5929 0 : ext4_mark_inode_dirty(NULL, inode);
5930 0 : ext4_free_ext_path(path);
5931 : }
5932 : }
5933 :
5934 : /* Check if *cur is a hole and if it is, skip it */
5935 0 : static int skip_hole(struct inode *inode, ext4_lblk_t *cur)
5936 : {
5937 0 : int ret;
5938 0 : struct ext4_map_blocks map;
5939 :
5940 0 : map.m_lblk = *cur;
5941 0 : map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur;
5942 :
5943 0 : ret = ext4_map_blocks(NULL, inode, &map, 0);
5944 0 : if (ret < 0)
5945 : return ret;
5946 0 : if (ret != 0)
5947 : return 0;
5948 0 : *cur = *cur + map.m_len;
5949 0 : return 0;
5950 : }
5951 :
5952 : /* Count number of blocks used by this inode and update i_blocks */
5953 0 : int ext4_ext_replay_set_iblocks(struct inode *inode)
5954 : {
5955 0 : struct ext4_ext_path *path = NULL, *path2 = NULL;
5956 0 : struct ext4_extent *ex;
5957 0 : ext4_lblk_t cur = 0, end;
5958 0 : int numblks = 0, i, ret = 0;
5959 0 : ext4_fsblk_t cmp1, cmp2;
5960 0 : struct ext4_map_blocks map;
5961 :
5962 : /* Determin the size of the file first */
5963 0 : path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5964 : EXT4_EX_NOCACHE);
5965 0 : if (IS_ERR(path))
5966 0 : return PTR_ERR(path);
5967 0 : ex = path[path->p_depth].p_ext;
5968 0 : if (!ex) {
5969 0 : ext4_free_ext_path(path);
5970 0 : goto out;
5971 : }
5972 0 : end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5973 0 : ext4_free_ext_path(path);
5974 :
5975 : /* Count the number of data blocks */
5976 0 : cur = 0;
5977 0 : while (cur < end) {
5978 0 : map.m_lblk = cur;
5979 0 : map.m_len = end - cur;
5980 0 : ret = ext4_map_blocks(NULL, inode, &map, 0);
5981 0 : if (ret < 0)
5982 : break;
5983 0 : if (ret > 0)
5984 0 : numblks += ret;
5985 0 : cur = cur + map.m_len;
5986 : }
5987 :
5988 : /*
5989 : * Count the number of extent tree blocks. We do it by looking up
5990 : * two successive extents and determining the difference between
5991 : * their paths. When path is different for 2 successive extents
5992 : * we compare the blocks in the path at each level and increment
5993 : * iblocks by total number of differences found.
5994 : */
5995 0 : cur = 0;
5996 0 : ret = skip_hole(inode, &cur);
5997 0 : if (ret < 0)
5998 0 : goto out;
5999 0 : path = ext4_find_extent(inode, cur, NULL, 0);
6000 0 : if (IS_ERR(path))
6001 0 : goto out;
6002 0 : numblks += path->p_depth;
6003 0 : ext4_free_ext_path(path);
6004 0 : while (cur < end) {
6005 0 : path = ext4_find_extent(inode, cur, NULL, 0);
6006 0 : if (IS_ERR(path))
6007 : break;
6008 0 : ex = path[path->p_depth].p_ext;
6009 0 : if (!ex) {
6010 0 : ext4_free_ext_path(path);
6011 0 : return 0;
6012 : }
6013 0 : cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
6014 : ext4_ext_get_actual_len(ex));
6015 0 : ret = skip_hole(inode, &cur);
6016 0 : if (ret < 0) {
6017 0 : ext4_free_ext_path(path);
6018 : break;
6019 : }
6020 0 : path2 = ext4_find_extent(inode, cur, NULL, 0);
6021 0 : if (IS_ERR(path2)) {
6022 0 : ext4_free_ext_path(path);
6023 : break;
6024 : }
6025 0 : for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
6026 0 : cmp1 = cmp2 = 0;
6027 0 : if (i <= path->p_depth)
6028 0 : cmp1 = path[i].p_bh ?
6029 0 : path[i].p_bh->b_blocknr : 0;
6030 0 : if (i <= path2->p_depth)
6031 0 : cmp2 = path2[i].p_bh ?
6032 0 : path2[i].p_bh->b_blocknr : 0;
6033 0 : if (cmp1 != cmp2 && cmp2 != 0)
6034 0 : numblks++;
6035 : }
6036 0 : ext4_free_ext_path(path);
6037 0 : ext4_free_ext_path(path2);
6038 : }
6039 :
6040 0 : out:
6041 0 : inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9);
6042 0 : ext4_mark_inode_dirty(NULL, inode);
6043 0 : return 0;
6044 : }
6045 :
6046 0 : int ext4_ext_clear_bb(struct inode *inode)
6047 : {
6048 0 : struct ext4_ext_path *path = NULL;
6049 0 : struct ext4_extent *ex;
6050 0 : ext4_lblk_t cur = 0, end;
6051 0 : int j, ret = 0;
6052 0 : struct ext4_map_blocks map;
6053 :
6054 0 : if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
6055 : return 0;
6056 :
6057 : /* Determin the size of the file first */
6058 0 : path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
6059 : EXT4_EX_NOCACHE);
6060 0 : if (IS_ERR(path))
6061 0 : return PTR_ERR(path);
6062 0 : ex = path[path->p_depth].p_ext;
6063 0 : if (!ex) {
6064 0 : ext4_free_ext_path(path);
6065 0 : return 0;
6066 : }
6067 0 : end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
6068 0 : ext4_free_ext_path(path);
6069 :
6070 0 : cur = 0;
6071 0 : while (cur < end) {
6072 0 : map.m_lblk = cur;
6073 0 : map.m_len = end - cur;
6074 0 : ret = ext4_map_blocks(NULL, inode, &map, 0);
6075 0 : if (ret < 0)
6076 : break;
6077 0 : if (ret > 0) {
6078 0 : path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
6079 0 : if (!IS_ERR_OR_NULL(path)) {
6080 0 : for (j = 0; j < path->p_depth; j++) {
6081 :
6082 0 : ext4_mb_mark_bb(inode->i_sb,
6083 0 : path[j].p_block, 1, 0);
6084 0 : ext4_fc_record_regions(inode->i_sb, inode->i_ino,
6085 : 0, path[j].p_block, 1, 1);
6086 : }
6087 0 : ext4_free_ext_path(path);
6088 : }
6089 0 : ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
6090 0 : ext4_fc_record_regions(inode->i_sb, inode->i_ino,
6091 0 : map.m_lblk, map.m_pblk, map.m_len, 1);
6092 : }
6093 0 : cur = cur + map.m_len;
6094 : }
6095 :
6096 : return 0;
6097 : }
|