Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
4 : * Written by Alex Tomas <alex@clusterfs.com>
5 : *
6 : * Architecture independence:
7 : * Copyright (c) 2005, Bull S.A.
8 : * Written by Pierre Peiffer <pierre.peiffer@bull.net>
9 : */
10 :
11 : /*
12 : * Extents support for EXT4
13 : *
14 : * TODO:
15 : * - ext4*_error() should be used in some situations
16 : * - analyze all BUG()/BUG_ON(), use -EIO where appropriate
17 : * - smart tree reduction
18 : */
19 :
20 : #include <linux/fs.h>
21 : #include <linux/time.h>
22 : #include <linux/jbd2.h>
23 : #include <linux/highuid.h>
24 : #include <linux/pagemap.h>
25 : #include <linux/quotaops.h>
26 : #include <linux/string.h>
27 : #include <linux/slab.h>
28 : #include <linux/uaccess.h>
29 : #include <linux/fiemap.h>
30 : #include <linux/iomap.h>
31 : #include <linux/sched/mm.h>
32 : #include "ext4_jbd2.h"
33 : #include "ext4_extents.h"
34 : #include "xattr.h"
35 :
36 : #include <trace/events/ext4.h>
37 :
38 : /*
39 : * used by extent splitting.
40 : */
41 : #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
42 : due to ENOSPC */
43 : #define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */
44 : #define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */
45 :
46 : #define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
47 : #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
48 :
49 0 : static __le32 ext4_extent_block_csum(struct inode *inode,
50 : struct ext4_extent_header *eh)
51 : {
52 0 : struct ext4_inode_info *ei = EXT4_I(inode);
53 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
54 0 : __u32 csum;
55 :
56 0 : csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh,
57 0 : EXT4_EXTENT_TAIL_OFFSET(eh));
58 0 : return cpu_to_le32(csum);
59 : }
60 :
61 0 : static int ext4_extent_block_csum_verify(struct inode *inode,
62 : struct ext4_extent_header *eh)
63 : {
64 0 : struct ext4_extent_tail *et;
65 :
66 0 : if (!ext4_has_metadata_csum(inode->i_sb))
67 : return 1;
68 :
69 0 : et = find_ext4_extent_tail(eh);
70 0 : if (et->et_checksum != ext4_extent_block_csum(inode, eh))
71 0 : return 0;
72 : return 1;
73 : }
74 :
75 0 : static void ext4_extent_block_csum_set(struct inode *inode,
76 : struct ext4_extent_header *eh)
77 : {
78 0 : struct ext4_extent_tail *et;
79 :
80 0 : if (!ext4_has_metadata_csum(inode->i_sb))
81 : return;
82 :
83 0 : et = find_ext4_extent_tail(eh);
84 0 : et->et_checksum = ext4_extent_block_csum(inode, eh);
85 : }
86 :
87 : static int ext4_split_extent_at(handle_t *handle,
88 : struct inode *inode,
89 : struct ext4_ext_path **ppath,
90 : ext4_lblk_t split,
91 : int split_flag,
92 : int flags);
93 :
94 0 : static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
95 : {
96 : /*
97 : * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
98 : * moment, get_block can be called only for blocks inside i_size since
99 : * page cache has been already dropped and writes are blocked by
100 : * i_rwsem. So we can safely drop the i_data_sem here.
101 : */
102 0 : BUG_ON(EXT4_JOURNAL(inode) == NULL);
103 0 : ext4_discard_preallocations(inode, 0);
104 0 : up_write(&EXT4_I(inode)->i_data_sem);
105 0 : *dropped = 1;
106 0 : return 0;
107 : }
108 :
109 0 : static void ext4_ext_drop_refs(struct ext4_ext_path *path)
110 : {
111 0 : int depth, i;
112 :
113 0 : if (!path)
114 : return;
115 0 : depth = path->p_depth;
116 0 : for (i = 0; i <= depth; i++, path++) {
117 0 : brelse(path->p_bh);
118 0 : path->p_bh = NULL;
119 : }
120 : }
121 :
122 0 : void ext4_free_ext_path(struct ext4_ext_path *path)
123 : {
124 0 : ext4_ext_drop_refs(path);
125 0 : kfree(path);
126 0 : }
127 :
128 : /*
129 : * Make sure 'handle' has at least 'check_cred' credits. If not, restart
130 : * transaction with 'restart_cred' credits. The function drops i_data_sem
131 : * when restarting transaction and gets it after transaction is restarted.
132 : *
133 : * The function returns 0 on success, 1 if transaction had to be restarted,
134 : * and < 0 in case of fatal error.
135 : */
136 0 : int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
137 : int check_cred, int restart_cred,
138 : int revoke_cred)
139 : {
140 0 : int ret;
141 0 : int dropped = 0;
142 :
143 0 : ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
144 : revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped));
145 0 : if (dropped)
146 0 : down_write(&EXT4_I(inode)->i_data_sem);
147 0 : return ret;
148 : }
149 :
150 : /*
151 : * could return:
152 : * - EROFS
153 : * - ENOMEM
154 : */
155 0 : static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
156 : struct ext4_ext_path *path)
157 : {
158 0 : int err = 0;
159 :
160 0 : if (path->p_bh) {
161 : /* path points to block */
162 0 : BUFFER_TRACE(path->p_bh, "get_write_access");
163 0 : err = ext4_journal_get_write_access(handle, inode->i_sb,
164 : path->p_bh, EXT4_JTR_NONE);
165 : /*
166 : * The extent buffer's verified bit will be set again in
167 : * __ext4_ext_dirty(). We could leave an inconsistent
168 : * buffer if the extents updating procudure break off du
169 : * to some error happens, force to check it again.
170 : */
171 0 : if (!err)
172 0 : clear_buffer_verified(path->p_bh);
173 : }
174 : /* path points to leaf/index in inode body */
175 : /* we use in-core data, no need to protect them */
176 0 : return err;
177 : }
178 :
179 : /*
180 : * could return:
181 : * - EROFS
182 : * - ENOMEM
183 : * - EIO
184 : */
185 0 : static int __ext4_ext_dirty(const char *where, unsigned int line,
186 : handle_t *handle, struct inode *inode,
187 : struct ext4_ext_path *path)
188 : {
189 0 : int err;
190 :
191 0 : WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
192 0 : if (path->p_bh) {
193 0 : ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
194 : /* path points to block */
195 0 : err = __ext4_handle_dirty_metadata(where, line, handle,
196 : inode, path->p_bh);
197 : /* Extents updating done, re-set verified flag */
198 0 : if (!err)
199 0 : set_buffer_verified(path->p_bh);
200 : } else {
201 : /* path points to leaf/index in inode body */
202 0 : err = ext4_mark_inode_dirty(handle, inode);
203 : }
204 0 : return err;
205 : }
206 :
207 : #define ext4_ext_dirty(handle, inode, path) \
208 : __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
209 :
210 0 : static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
211 : struct ext4_ext_path *path,
212 : ext4_lblk_t block)
213 : {
214 0 : if (path) {
215 0 : int depth = path->p_depth;
216 0 : struct ext4_extent *ex;
217 :
218 : /*
219 : * Try to predict block placement assuming that we are
220 : * filling in a file which will eventually be
221 : * non-sparse --- i.e., in the case of libbfd writing
222 : * an ELF object sections out-of-order but in a way
223 : * the eventually results in a contiguous object or
224 : * executable file, or some database extending a table
225 : * space file. However, this is actually somewhat
226 : * non-ideal if we are writing a sparse file such as
227 : * qemu or KVM writing a raw image file that is going
228 : * to stay fairly sparse, since it will end up
229 : * fragmenting the file system's free space. Maybe we
230 : * should have some hueristics or some way to allow
231 : * userspace to pass a hint to file system,
232 : * especially if the latter case turns out to be
233 : * common.
234 : */
235 0 : ex = path[depth].p_ext;
236 0 : if (ex) {
237 0 : ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
238 0 : ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
239 :
240 0 : if (block > ext_block)
241 0 : return ext_pblk + (block - ext_block);
242 : else
243 0 : return ext_pblk - (ext_block - block);
244 : }
245 :
246 : /* it looks like index is empty;
247 : * try to find starting block from index itself */
248 0 : if (path[depth].p_bh)
249 0 : return path[depth].p_bh->b_blocknr;
250 : }
251 :
252 : /* OK. use inode's group */
253 0 : return ext4_inode_to_goal_block(inode);
254 : }
255 :
256 : /*
257 : * Allocation for a meta data block
258 : */
259 : static ext4_fsblk_t
260 0 : ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
261 : struct ext4_ext_path *path,
262 : struct ext4_extent *ex, int *err, unsigned int flags)
263 : {
264 0 : ext4_fsblk_t goal, newblock;
265 :
266 0 : goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
267 0 : newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
268 : NULL, err);
269 0 : return newblock;
270 : }
271 :
272 : static inline int ext4_ext_space_block(struct inode *inode, int check)
273 : {
274 0 : int size;
275 :
276 0 : size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
277 0 : / sizeof(struct ext4_extent);
278 : #ifdef AGGRESSIVE_TEST
279 : if (!check && size > 6)
280 : size = 6;
281 : #endif
282 0 : return size;
283 : }
284 :
285 : static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
286 : {
287 0 : int size;
288 :
289 0 : size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
290 0 : / sizeof(struct ext4_extent_idx);
291 : #ifdef AGGRESSIVE_TEST
292 : if (!check && size > 5)
293 : size = 5;
294 : #endif
295 0 : return size;
296 : }
297 :
298 : static inline int ext4_ext_space_root(struct inode *inode, int check)
299 : {
300 : int size;
301 :
302 : size = sizeof(EXT4_I(inode)->i_data);
303 : size -= sizeof(struct ext4_extent_header);
304 : size /= sizeof(struct ext4_extent);
305 : #ifdef AGGRESSIVE_TEST
306 : if (!check && size > 3)
307 : size = 3;
308 : #endif
309 : return size;
310 : }
311 :
312 : static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
313 : {
314 : int size;
315 :
316 : size = sizeof(EXT4_I(inode)->i_data);
317 : size -= sizeof(struct ext4_extent_header);
318 : size /= sizeof(struct ext4_extent_idx);
319 : #ifdef AGGRESSIVE_TEST
320 : if (!check && size > 4)
321 : size = 4;
322 : #endif
323 : return size;
324 : }
325 :
326 : static inline int
327 0 : ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
328 : struct ext4_ext_path **ppath, ext4_lblk_t lblk,
329 : int nofail)
330 : {
331 0 : struct ext4_ext_path *path = *ppath;
332 0 : int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
333 0 : int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO;
334 :
335 0 : if (nofail)
336 0 : flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL;
337 :
338 0 : return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
339 : EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
340 : flags);
341 : }
342 :
343 : static int
344 0 : ext4_ext_max_entries(struct inode *inode, int depth)
345 : {
346 0 : int max;
347 :
348 0 : if (depth == ext_depth(inode)) {
349 : if (depth == 0)
350 : max = ext4_ext_space_root(inode, 1);
351 : else
352 : max = ext4_ext_space_root_idx(inode, 1);
353 : } else {
354 0 : if (depth == 0)
355 0 : max = ext4_ext_space_block(inode, 1);
356 : else
357 0 : max = ext4_ext_space_block_idx(inode, 1);
358 : }
359 :
360 0 : return max;
361 : }
362 :
363 0 : static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
364 : {
365 0 : ext4_fsblk_t block = ext4_ext_pblock(ext);
366 0 : int len = ext4_ext_get_actual_len(ext);
367 0 : ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
368 :
369 : /*
370 : * We allow neither:
371 : * - zero length
372 : * - overflow/wrap-around
373 : */
374 0 : if (lblock + len <= lblock)
375 : return 0;
376 0 : return ext4_inode_block_valid(inode, block, len);
377 : }
378 :
379 0 : static int ext4_valid_extent_idx(struct inode *inode,
380 : struct ext4_extent_idx *ext_idx)
381 : {
382 0 : ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
383 :
384 0 : return ext4_inode_block_valid(inode, block, 1);
385 : }
386 :
387 0 : static int ext4_valid_extent_entries(struct inode *inode,
388 : struct ext4_extent_header *eh,
389 : ext4_lblk_t lblk, ext4_fsblk_t *pblk,
390 : int depth)
391 : {
392 0 : unsigned short entries;
393 0 : ext4_lblk_t lblock = 0;
394 0 : ext4_lblk_t cur = 0;
395 :
396 0 : if (eh->eh_entries == 0)
397 : return 1;
398 :
399 0 : entries = le16_to_cpu(eh->eh_entries);
400 :
401 0 : if (depth == 0) {
402 : /* leaf entries */
403 0 : struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
404 :
405 : /*
406 : * The logical block in the first entry should equal to
407 : * the number in the index block.
408 : */
409 0 : if (depth != ext_depth(inode) &&
410 0 : lblk != le32_to_cpu(ext->ee_block))
411 : return 0;
412 0 : while (entries) {
413 0 : if (!ext4_valid_extent(inode, ext))
414 : return 0;
415 :
416 : /* Check for overlapping extents */
417 0 : lblock = le32_to_cpu(ext->ee_block);
418 0 : if (lblock < cur) {
419 0 : *pblk = ext4_ext_pblock(ext);
420 0 : return 0;
421 : }
422 0 : cur = lblock + ext4_ext_get_actual_len(ext);
423 0 : ext++;
424 0 : entries--;
425 : }
426 : } else {
427 0 : struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
428 :
429 : /*
430 : * The logical block in the first entry should equal to
431 : * the number in the parent index block.
432 : */
433 0 : if (depth != ext_depth(inode) &&
434 0 : lblk != le32_to_cpu(ext_idx->ei_block))
435 : return 0;
436 0 : while (entries) {
437 0 : if (!ext4_valid_extent_idx(inode, ext_idx))
438 : return 0;
439 :
440 : /* Check for overlapping index extents */
441 0 : lblock = le32_to_cpu(ext_idx->ei_block);
442 0 : if (lblock < cur) {
443 0 : *pblk = ext4_idx_pblock(ext_idx);
444 0 : return 0;
445 : }
446 0 : ext_idx++;
447 0 : entries--;
448 0 : cur = lblock + 1;
449 : }
450 : }
451 : return 1;
452 : }
453 :
454 0 : static int __ext4_ext_check(const char *function, unsigned int line,
455 : struct inode *inode, struct ext4_extent_header *eh,
456 : int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk)
457 : {
458 0 : const char *error_msg;
459 0 : int max = 0, err = -EFSCORRUPTED;
460 :
461 0 : if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
462 0 : error_msg = "invalid magic";
463 0 : goto corrupted;
464 : }
465 0 : if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
466 0 : error_msg = "unexpected eh_depth";
467 0 : goto corrupted;
468 : }
469 0 : if (unlikely(eh->eh_max == 0)) {
470 0 : error_msg = "invalid eh_max";
471 0 : goto corrupted;
472 : }
473 0 : max = ext4_ext_max_entries(inode, depth);
474 0 : if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
475 0 : error_msg = "too large eh_max";
476 0 : goto corrupted;
477 : }
478 0 : if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
479 0 : error_msg = "invalid eh_entries";
480 0 : goto corrupted;
481 : }
482 0 : if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
483 0 : error_msg = "eh_entries is 0 but eh_depth is > 0";
484 0 : goto corrupted;
485 : }
486 0 : if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
487 0 : error_msg = "invalid extent entries";
488 0 : goto corrupted;
489 : }
490 0 : if (unlikely(depth > 32)) {
491 0 : error_msg = "too large eh_depth";
492 0 : goto corrupted;
493 : }
494 : /* Verify checksum on non-root extent tree nodes */
495 0 : if (ext_depth(inode) != depth &&
496 0 : !ext4_extent_block_csum_verify(inode, eh)) {
497 0 : error_msg = "extent tree corrupted";
498 0 : err = -EFSBADCRC;
499 0 : goto corrupted;
500 : }
501 : return 0;
502 :
503 0 : corrupted:
504 0 : ext4_error_inode_err(inode, function, line, 0, -err,
505 : "pblk %llu bad header/extent: %s - magic %x, "
506 : "entries %u, max %u(%u), depth %u(%u)",
507 : (unsigned long long) pblk, error_msg,
508 : le16_to_cpu(eh->eh_magic),
509 : le16_to_cpu(eh->eh_entries),
510 : le16_to_cpu(eh->eh_max),
511 : max, le16_to_cpu(eh->eh_depth), depth);
512 0 : return err;
513 : }
514 :
515 : #define ext4_ext_check(inode, eh, depth, pblk) \
516 : __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0)
517 :
518 0 : int ext4_ext_check_inode(struct inode *inode)
519 : {
520 0 : return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
521 : }
522 :
523 0 : static void ext4_cache_extents(struct inode *inode,
524 : struct ext4_extent_header *eh)
525 : {
526 0 : struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
527 0 : ext4_lblk_t prev = 0;
528 0 : int i;
529 :
530 0 : for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
531 0 : unsigned int status = EXTENT_STATUS_WRITTEN;
532 0 : ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
533 0 : int len = ext4_ext_get_actual_len(ex);
534 :
535 0 : if (prev && (prev != lblk))
536 0 : ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
537 : EXTENT_STATUS_HOLE);
538 :
539 0 : if (ext4_ext_is_unwritten(ex))
540 0 : status = EXTENT_STATUS_UNWRITTEN;
541 0 : ext4_es_cache_extent(inode, lblk, len,
542 : ext4_ext_pblock(ex), status);
543 0 : prev = lblk + len;
544 : }
545 0 : }
546 :
547 : static struct buffer_head *
548 0 : __read_extent_tree_block(const char *function, unsigned int line,
549 : struct inode *inode, struct ext4_extent_idx *idx,
550 : int depth, int flags)
551 : {
552 0 : struct buffer_head *bh;
553 0 : int err;
554 0 : gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS;
555 0 : ext4_fsblk_t pblk;
556 :
557 0 : if (flags & EXT4_EX_NOFAIL)
558 0 : gfp_flags |= __GFP_NOFAIL;
559 :
560 0 : pblk = ext4_idx_pblock(idx);
561 0 : bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags);
562 0 : if (unlikely(!bh))
563 : return ERR_PTR(-ENOMEM);
564 :
565 0 : if (!bh_uptodate_or_lock(bh)) {
566 0 : trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
567 0 : err = ext4_read_bh(bh, 0, NULL);
568 0 : if (err < 0)
569 0 : goto errout;
570 : }
571 0 : if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
572 : return bh;
573 0 : err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh),
574 0 : depth, pblk, le32_to_cpu(idx->ei_block));
575 0 : if (err)
576 0 : goto errout;
577 0 : set_buffer_verified(bh);
578 : /*
579 : * If this is a leaf block, cache all of its entries
580 : */
581 0 : if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
582 0 : struct ext4_extent_header *eh = ext_block_hdr(bh);
583 0 : ext4_cache_extents(inode, eh);
584 : }
585 : return bh;
586 0 : errout:
587 0 : put_bh(bh);
588 0 : return ERR_PTR(err);
589 :
590 : }
591 :
592 : #define read_extent_tree_block(inode, idx, depth, flags) \
593 : __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \
594 : (depth), (flags))
595 :
596 : /*
597 : * This function is called to cache a file's extent information in the
598 : * extent status tree
599 : */
600 0 : int ext4_ext_precache(struct inode *inode)
601 : {
602 0 : struct ext4_inode_info *ei = EXT4_I(inode);
603 0 : struct ext4_ext_path *path = NULL;
604 0 : struct buffer_head *bh;
605 0 : int i = 0, depth, ret = 0;
606 :
607 0 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
608 : return 0; /* not an extent-mapped inode */
609 :
610 0 : down_read(&ei->i_data_sem);
611 0 : depth = ext_depth(inode);
612 :
613 : /* Don't cache anything if there are no external extent blocks */
614 0 : if (!depth) {
615 0 : up_read(&ei->i_data_sem);
616 0 : return ret;
617 : }
618 :
619 0 : path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
620 : GFP_NOFS);
621 0 : if (path == NULL) {
622 0 : up_read(&ei->i_data_sem);
623 0 : return -ENOMEM;
624 : }
625 :
626 0 : path[0].p_hdr = ext_inode_hdr(inode);
627 0 : ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0);
628 0 : if (ret)
629 0 : goto out;
630 0 : path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr);
631 0 : while (i >= 0) {
632 : /*
633 : * If this is a leaf block or we've reached the end of
634 : * the index block, go up
635 : */
636 0 : if ((i == depth) ||
637 0 : path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
638 0 : brelse(path[i].p_bh);
639 0 : path[i].p_bh = NULL;
640 0 : i--;
641 0 : continue;
642 : }
643 0 : bh = read_extent_tree_block(inode, path[i].p_idx++,
644 : depth - i - 1,
645 : EXT4_EX_FORCE_CACHE);
646 0 : if (IS_ERR(bh)) {
647 0 : ret = PTR_ERR(bh);
648 0 : break;
649 : }
650 0 : i++;
651 0 : path[i].p_bh = bh;
652 0 : path[i].p_hdr = ext_block_hdr(bh);
653 0 : path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr);
654 : }
655 0 : ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
656 0 : out:
657 0 : up_read(&ei->i_data_sem);
658 0 : ext4_free_ext_path(path);
659 0 : return ret;
660 : }
661 :
662 : #ifdef EXT_DEBUG
663 : static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
664 : {
665 : int k, l = path->p_depth;
666 :
667 : ext_debug(inode, "path:");
668 : for (k = 0; k <= l; k++, path++) {
669 : if (path->p_idx) {
670 : ext_debug(inode, " %d->%llu",
671 : le32_to_cpu(path->p_idx->ei_block),
672 : ext4_idx_pblock(path->p_idx));
673 : } else if (path->p_ext) {
674 : ext_debug(inode, " %d:[%d]%d:%llu ",
675 : le32_to_cpu(path->p_ext->ee_block),
676 : ext4_ext_is_unwritten(path->p_ext),
677 : ext4_ext_get_actual_len(path->p_ext),
678 : ext4_ext_pblock(path->p_ext));
679 : } else
680 : ext_debug(inode, " []");
681 : }
682 : ext_debug(inode, "\n");
683 : }
684 :
685 : static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
686 : {
687 : int depth = ext_depth(inode);
688 : struct ext4_extent_header *eh;
689 : struct ext4_extent *ex;
690 : int i;
691 :
692 : if (!path)
693 : return;
694 :
695 : eh = path[depth].p_hdr;
696 : ex = EXT_FIRST_EXTENT(eh);
697 :
698 : ext_debug(inode, "Displaying leaf extents\n");
699 :
700 : for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
701 : ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
702 : ext4_ext_is_unwritten(ex),
703 : ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
704 : }
705 : ext_debug(inode, "\n");
706 : }
707 :
708 : static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
709 : ext4_fsblk_t newblock, int level)
710 : {
711 : int depth = ext_depth(inode);
712 : struct ext4_extent *ex;
713 :
714 : if (depth != level) {
715 : struct ext4_extent_idx *idx;
716 : idx = path[level].p_idx;
717 : while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
718 : ext_debug(inode, "%d: move %d:%llu in new index %llu\n",
719 : level, le32_to_cpu(idx->ei_block),
720 : ext4_idx_pblock(idx), newblock);
721 : idx++;
722 : }
723 :
724 : return;
725 : }
726 :
727 : ex = path[depth].p_ext;
728 : while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
729 : ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n",
730 : le32_to_cpu(ex->ee_block),
731 : ext4_ext_pblock(ex),
732 : ext4_ext_is_unwritten(ex),
733 : ext4_ext_get_actual_len(ex),
734 : newblock);
735 : ex++;
736 : }
737 : }
738 :
739 : #else
740 : #define ext4_ext_show_path(inode, path)
741 : #define ext4_ext_show_leaf(inode, path)
742 : #define ext4_ext_show_move(inode, path, newblock, level)
743 : #endif
744 :
745 : /*
746 : * ext4_ext_binsearch_idx:
747 : * binary search for the closest index of the given block
748 : * the header must be checked before calling this
749 : */
750 : static void
751 0 : ext4_ext_binsearch_idx(struct inode *inode,
752 : struct ext4_ext_path *path, ext4_lblk_t block)
753 : {
754 0 : struct ext4_extent_header *eh = path->p_hdr;
755 0 : struct ext4_extent_idx *r, *l, *m;
756 :
757 :
758 0 : ext_debug(inode, "binsearch for %u(idx): ", block);
759 :
760 0 : l = EXT_FIRST_INDEX(eh) + 1;
761 0 : r = EXT_LAST_INDEX(eh);
762 0 : while (l <= r) {
763 0 : m = l + (r - l) / 2;
764 0 : ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
765 : le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
766 : r, le32_to_cpu(r->ei_block));
767 :
768 0 : if (block < le32_to_cpu(m->ei_block))
769 0 : r = m - 1;
770 : else
771 0 : l = m + 1;
772 : }
773 :
774 0 : path->p_idx = l - 1;
775 0 : ext_debug(inode, " -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
776 : ext4_idx_pblock(path->p_idx));
777 :
778 : #ifdef CHECK_BINSEARCH
779 : {
780 : struct ext4_extent_idx *chix, *ix;
781 : int k;
782 :
783 : chix = ix = EXT_FIRST_INDEX(eh);
784 : for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
785 : if (k != 0 && le32_to_cpu(ix->ei_block) <=
786 : le32_to_cpu(ix[-1].ei_block)) {
787 : printk(KERN_DEBUG "k=%d, ix=0x%p, "
788 : "first=0x%p\n", k,
789 : ix, EXT_FIRST_INDEX(eh));
790 : printk(KERN_DEBUG "%u <= %u\n",
791 : le32_to_cpu(ix->ei_block),
792 : le32_to_cpu(ix[-1].ei_block));
793 : }
794 : BUG_ON(k && le32_to_cpu(ix->ei_block)
795 : <= le32_to_cpu(ix[-1].ei_block));
796 : if (block < le32_to_cpu(ix->ei_block))
797 : break;
798 : chix = ix;
799 : }
800 : BUG_ON(chix != path->p_idx);
801 : }
802 : #endif
803 :
804 0 : }
805 :
806 : /*
807 : * ext4_ext_binsearch:
808 : * binary search for closest extent of the given block
809 : * the header must be checked before calling this
810 : */
811 : static void
812 0 : ext4_ext_binsearch(struct inode *inode,
813 : struct ext4_ext_path *path, ext4_lblk_t block)
814 : {
815 0 : struct ext4_extent_header *eh = path->p_hdr;
816 0 : struct ext4_extent *r, *l, *m;
817 :
818 0 : if (eh->eh_entries == 0) {
819 : /*
820 : * this leaf is empty:
821 : * we get such a leaf in split/add case
822 : */
823 : return;
824 : }
825 :
826 0 : ext_debug(inode, "binsearch for %u: ", block);
827 :
828 0 : l = EXT_FIRST_EXTENT(eh) + 1;
829 0 : r = EXT_LAST_EXTENT(eh);
830 :
831 0 : while (l <= r) {
832 0 : m = l + (r - l) / 2;
833 0 : ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
834 : le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
835 : r, le32_to_cpu(r->ee_block));
836 :
837 0 : if (block < le32_to_cpu(m->ee_block))
838 0 : r = m - 1;
839 : else
840 0 : l = m + 1;
841 : }
842 :
843 0 : path->p_ext = l - 1;
844 0 : ext_debug(inode, " -> %d:%llu:[%d]%d ",
845 : le32_to_cpu(path->p_ext->ee_block),
846 : ext4_ext_pblock(path->p_ext),
847 : ext4_ext_is_unwritten(path->p_ext),
848 : ext4_ext_get_actual_len(path->p_ext));
849 :
850 : #ifdef CHECK_BINSEARCH
851 : {
852 : struct ext4_extent *chex, *ex;
853 : int k;
854 :
855 : chex = ex = EXT_FIRST_EXTENT(eh);
856 : for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
857 : BUG_ON(k && le32_to_cpu(ex->ee_block)
858 : <= le32_to_cpu(ex[-1].ee_block));
859 : if (block < le32_to_cpu(ex->ee_block))
860 : break;
861 : chex = ex;
862 : }
863 : BUG_ON(chex != path->p_ext);
864 : }
865 : #endif
866 :
867 : }
868 :
869 0 : void ext4_ext_tree_init(handle_t *handle, struct inode *inode)
870 : {
871 0 : struct ext4_extent_header *eh;
872 :
873 0 : eh = ext_inode_hdr(inode);
874 0 : eh->eh_depth = 0;
875 0 : eh->eh_entries = 0;
876 0 : eh->eh_magic = EXT4_EXT_MAGIC;
877 0 : eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
878 0 : eh->eh_generation = 0;
879 0 : ext4_mark_inode_dirty(handle, inode);
880 0 : }
881 :
882 : struct ext4_ext_path *
883 0 : ext4_find_extent(struct inode *inode, ext4_lblk_t block,
884 : struct ext4_ext_path **orig_path, int flags)
885 : {
886 0 : struct ext4_extent_header *eh;
887 0 : struct buffer_head *bh;
888 0 : struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
889 0 : short int depth, i, ppos = 0;
890 0 : int ret;
891 0 : gfp_t gfp_flags = GFP_NOFS;
892 :
893 0 : if (flags & EXT4_EX_NOFAIL)
894 0 : gfp_flags |= __GFP_NOFAIL;
895 :
896 0 : eh = ext_inode_hdr(inode);
897 0 : depth = ext_depth(inode);
898 0 : if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
899 0 : EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
900 : depth);
901 0 : ret = -EFSCORRUPTED;
902 0 : goto err;
903 : }
904 :
905 0 : if (path) {
906 0 : ext4_ext_drop_refs(path);
907 0 : if (depth > path[0].p_maxdepth) {
908 0 : kfree(path);
909 0 : *orig_path = path = NULL;
910 : }
911 : }
912 0 : if (!path) {
913 : /* account possible depth increase */
914 0 : path = kcalloc(depth + 2, sizeof(struct ext4_ext_path),
915 : gfp_flags);
916 0 : if (unlikely(!path))
917 : return ERR_PTR(-ENOMEM);
918 0 : path[0].p_maxdepth = depth + 1;
919 : }
920 0 : path[0].p_hdr = eh;
921 0 : path[0].p_bh = NULL;
922 :
923 0 : i = depth;
924 0 : if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
925 0 : ext4_cache_extents(inode, eh);
926 : /* walk through the tree */
927 0 : while (i) {
928 0 : ext_debug(inode, "depth %d: num %d, max %d\n",
929 : ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
930 :
931 0 : ext4_ext_binsearch_idx(inode, path + ppos, block);
932 0 : path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
933 0 : path[ppos].p_depth = i;
934 0 : path[ppos].p_ext = NULL;
935 :
936 0 : bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags);
937 0 : if (IS_ERR(bh)) {
938 0 : ret = PTR_ERR(bh);
939 0 : goto err;
940 : }
941 :
942 0 : eh = ext_block_hdr(bh);
943 0 : ppos++;
944 0 : path[ppos].p_bh = bh;
945 0 : path[ppos].p_hdr = eh;
946 : }
947 :
948 0 : path[ppos].p_depth = i;
949 0 : path[ppos].p_ext = NULL;
950 0 : path[ppos].p_idx = NULL;
951 :
952 : /* find extent */
953 0 : ext4_ext_binsearch(inode, path + ppos, block);
954 : /* if not an empty leaf */
955 0 : if (path[ppos].p_ext)
956 0 : path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
957 :
958 : ext4_ext_show_path(inode, path);
959 :
960 : return path;
961 :
962 0 : err:
963 0 : ext4_free_ext_path(path);
964 0 : if (orig_path)
965 0 : *orig_path = NULL;
966 0 : return ERR_PTR(ret);
967 : }
968 :
969 : /*
970 : * ext4_ext_insert_index:
971 : * insert new index [@logical;@ptr] into the block at @curp;
972 : * check where to insert: before @curp or after @curp
973 : */
974 0 : static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
975 : struct ext4_ext_path *curp,
976 : int logical, ext4_fsblk_t ptr)
977 : {
978 0 : struct ext4_extent_idx *ix;
979 0 : int len, err;
980 :
981 0 : err = ext4_ext_get_access(handle, inode, curp);
982 0 : if (err)
983 : return err;
984 :
985 0 : if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
986 0 : EXT4_ERROR_INODE(inode,
987 : "logical %d == ei_block %d!",
988 : logical, le32_to_cpu(curp->p_idx->ei_block));
989 0 : return -EFSCORRUPTED;
990 : }
991 :
992 0 : if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
993 : >= le16_to_cpu(curp->p_hdr->eh_max))) {
994 0 : EXT4_ERROR_INODE(inode,
995 : "eh_entries %d >= eh_max %d!",
996 : le16_to_cpu(curp->p_hdr->eh_entries),
997 : le16_to_cpu(curp->p_hdr->eh_max));
998 0 : return -EFSCORRUPTED;
999 : }
1000 :
1001 0 : if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
1002 : /* insert after */
1003 0 : ext_debug(inode, "insert new index %d after: %llu\n",
1004 : logical, ptr);
1005 0 : ix = curp->p_idx + 1;
1006 : } else {
1007 : /* insert before */
1008 : ext_debug(inode, "insert new index %d before: %llu\n",
1009 : logical, ptr);
1010 : ix = curp->p_idx;
1011 : }
1012 :
1013 0 : len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
1014 0 : BUG_ON(len < 0);
1015 0 : if (len > 0) {
1016 0 : ext_debug(inode, "insert new index %d: "
1017 : "move %d indices from 0x%p to 0x%p\n",
1018 : logical, len, ix, ix + 1);
1019 0 : memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
1020 : }
1021 :
1022 0 : if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
1023 0 : EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
1024 0 : return -EFSCORRUPTED;
1025 : }
1026 :
1027 0 : ix->ei_block = cpu_to_le32(logical);
1028 0 : ext4_idx_store_pblock(ix, ptr);
1029 0 : le16_add_cpu(&curp->p_hdr->eh_entries, 1);
1030 :
1031 0 : if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
1032 0 : EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
1033 0 : return -EFSCORRUPTED;
1034 : }
1035 :
1036 0 : err = ext4_ext_dirty(handle, inode, curp);
1037 0 : ext4_std_error(inode->i_sb, err);
1038 :
1039 : return err;
1040 : }
1041 :
1042 : /*
1043 : * ext4_ext_split:
1044 : * inserts new subtree into the path, using free index entry
1045 : * at depth @at:
1046 : * - allocates all needed blocks (new leaf and all intermediate index blocks)
1047 : * - makes decision where to split
1048 : * - moves remaining extents and index entries (right to the split point)
1049 : * into the newly allocated blocks
1050 : * - initializes subtree
1051 : */
1052 0 : static int ext4_ext_split(handle_t *handle, struct inode *inode,
1053 : unsigned int flags,
1054 : struct ext4_ext_path *path,
1055 : struct ext4_extent *newext, int at)
1056 : {
1057 0 : struct buffer_head *bh = NULL;
1058 0 : int depth = ext_depth(inode);
1059 0 : struct ext4_extent_header *neh;
1060 0 : struct ext4_extent_idx *fidx;
1061 0 : int i = at, k, m, a;
1062 0 : ext4_fsblk_t newblock, oldblock;
1063 0 : __le32 border;
1064 0 : ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
1065 0 : gfp_t gfp_flags = GFP_NOFS;
1066 0 : int err = 0;
1067 0 : size_t ext_size = 0;
1068 :
1069 0 : if (flags & EXT4_EX_NOFAIL)
1070 0 : gfp_flags |= __GFP_NOFAIL;
1071 :
1072 : /* make decision: where to split? */
1073 : /* FIXME: now decision is simplest: at current extent */
1074 :
1075 : /* if current leaf will be split, then we should use
1076 : * border from split point */
1077 0 : if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
1078 0 : EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
1079 0 : return -EFSCORRUPTED;
1080 : }
1081 0 : if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
1082 0 : border = path[depth].p_ext[1].ee_block;
1083 0 : ext_debug(inode, "leaf will be split."
1084 : " next leaf starts at %d\n",
1085 : le32_to_cpu(border));
1086 : } else {
1087 0 : border = newext->ee_block;
1088 0 : ext_debug(inode, "leaf will be added."
1089 : " next leaf starts at %d\n",
1090 : le32_to_cpu(border));
1091 : }
1092 :
1093 : /*
1094 : * If error occurs, then we break processing
1095 : * and mark filesystem read-only. index won't
1096 : * be inserted and tree will be in consistent
1097 : * state. Next mount will repair buffers too.
1098 : */
1099 :
1100 : /*
1101 : * Get array to track all allocated blocks.
1102 : * We need this to handle errors and free blocks
1103 : * upon them.
1104 : */
1105 0 : ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags);
1106 0 : if (!ablocks)
1107 : return -ENOMEM;
1108 :
1109 : /* allocate all needed blocks */
1110 : ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at);
1111 0 : for (a = 0; a < depth - at; a++) {
1112 0 : newblock = ext4_ext_new_meta_block(handle, inode, path,
1113 : newext, &err, flags);
1114 0 : if (newblock == 0)
1115 0 : goto cleanup;
1116 0 : ablocks[a] = newblock;
1117 : }
1118 :
1119 : /* initialize new leaf */
1120 0 : newblock = ablocks[--a];
1121 0 : if (unlikely(newblock == 0)) {
1122 0 : EXT4_ERROR_INODE(inode, "newblock == 0!");
1123 0 : err = -EFSCORRUPTED;
1124 0 : goto cleanup;
1125 : }
1126 0 : bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
1127 0 : if (unlikely(!bh)) {
1128 0 : err = -ENOMEM;
1129 0 : goto cleanup;
1130 : }
1131 0 : lock_buffer(bh);
1132 :
1133 0 : err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
1134 : EXT4_JTR_NONE);
1135 0 : if (err)
1136 0 : goto cleanup;
1137 :
1138 0 : neh = ext_block_hdr(bh);
1139 0 : neh->eh_entries = 0;
1140 0 : neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
1141 0 : neh->eh_magic = EXT4_EXT_MAGIC;
1142 0 : neh->eh_depth = 0;
1143 0 : neh->eh_generation = 0;
1144 :
1145 : /* move remainder of path[depth] to the new leaf */
1146 0 : if (unlikely(path[depth].p_hdr->eh_entries !=
1147 : path[depth].p_hdr->eh_max)) {
1148 0 : EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
1149 : path[depth].p_hdr->eh_entries,
1150 : path[depth].p_hdr->eh_max);
1151 0 : err = -EFSCORRUPTED;
1152 0 : goto cleanup;
1153 : }
1154 : /* start copy from next extent */
1155 0 : m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
1156 0 : ext4_ext_show_move(inode, path, newblock, depth);
1157 0 : if (m) {
1158 0 : struct ext4_extent *ex;
1159 0 : ex = EXT_FIRST_EXTENT(neh);
1160 0 : memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
1161 0 : le16_add_cpu(&neh->eh_entries, m);
1162 : }
1163 :
1164 : /* zero out unused area in the extent block */
1165 0 : ext_size = sizeof(struct ext4_extent_header) +
1166 0 : sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries);
1167 0 : memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
1168 0 : ext4_extent_block_csum_set(inode, neh);
1169 0 : set_buffer_uptodate(bh);
1170 0 : unlock_buffer(bh);
1171 :
1172 0 : err = ext4_handle_dirty_metadata(handle, inode, bh);
1173 0 : if (err)
1174 0 : goto cleanup;
1175 0 : brelse(bh);
1176 0 : bh = NULL;
1177 :
1178 : /* correct old leaf */
1179 0 : if (m) {
1180 0 : err = ext4_ext_get_access(handle, inode, path + depth);
1181 0 : if (err)
1182 0 : goto cleanup;
1183 0 : le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
1184 0 : err = ext4_ext_dirty(handle, inode, path + depth);
1185 0 : if (err)
1186 0 : goto cleanup;
1187 :
1188 : }
1189 :
1190 : /* create intermediate indexes */
1191 0 : k = depth - at - 1;
1192 0 : if (unlikely(k < 0)) {
1193 0 : EXT4_ERROR_INODE(inode, "k %d < 0!", k);
1194 0 : err = -EFSCORRUPTED;
1195 0 : goto cleanup;
1196 : }
1197 0 : if (k)
1198 : ext_debug(inode, "create %d intermediate indices\n", k);
1199 : /* insert new index into current index block */
1200 : /* current depth stored in i var */
1201 0 : i = depth - 1;
1202 0 : while (k--) {
1203 0 : oldblock = newblock;
1204 0 : newblock = ablocks[--a];
1205 0 : bh = sb_getblk(inode->i_sb, newblock);
1206 0 : if (unlikely(!bh)) {
1207 0 : err = -ENOMEM;
1208 0 : goto cleanup;
1209 : }
1210 0 : lock_buffer(bh);
1211 :
1212 0 : err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
1213 : EXT4_JTR_NONE);
1214 0 : if (err)
1215 0 : goto cleanup;
1216 :
1217 0 : neh = ext_block_hdr(bh);
1218 0 : neh->eh_entries = cpu_to_le16(1);
1219 0 : neh->eh_magic = EXT4_EXT_MAGIC;
1220 0 : neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
1221 0 : neh->eh_depth = cpu_to_le16(depth - i);
1222 0 : neh->eh_generation = 0;
1223 0 : fidx = EXT_FIRST_INDEX(neh);
1224 0 : fidx->ei_block = border;
1225 0 : ext4_idx_store_pblock(fidx, oldblock);
1226 :
1227 0 : ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n",
1228 : i, newblock, le32_to_cpu(border), oldblock);
1229 :
1230 : /* move remainder of path[i] to the new index block */
1231 0 : if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
1232 : EXT_LAST_INDEX(path[i].p_hdr))) {
1233 0 : EXT4_ERROR_INODE(inode,
1234 : "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
1235 : le32_to_cpu(path[i].p_ext->ee_block));
1236 0 : err = -EFSCORRUPTED;
1237 0 : goto cleanup;
1238 : }
1239 : /* start copy indexes */
1240 0 : m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
1241 0 : ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
1242 : EXT_MAX_INDEX(path[i].p_hdr));
1243 0 : ext4_ext_show_move(inode, path, newblock, i);
1244 0 : if (m) {
1245 0 : memmove(++fidx, path[i].p_idx,
1246 : sizeof(struct ext4_extent_idx) * m);
1247 0 : le16_add_cpu(&neh->eh_entries, m);
1248 : }
1249 : /* zero out unused area in the extent block */
1250 0 : ext_size = sizeof(struct ext4_extent_header) +
1251 0 : (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries));
1252 0 : memset(bh->b_data + ext_size, 0,
1253 : inode->i_sb->s_blocksize - ext_size);
1254 0 : ext4_extent_block_csum_set(inode, neh);
1255 0 : set_buffer_uptodate(bh);
1256 0 : unlock_buffer(bh);
1257 :
1258 0 : err = ext4_handle_dirty_metadata(handle, inode, bh);
1259 0 : if (err)
1260 0 : goto cleanup;
1261 0 : brelse(bh);
1262 0 : bh = NULL;
1263 :
1264 : /* correct old index */
1265 0 : if (m) {
1266 0 : err = ext4_ext_get_access(handle, inode, path + i);
1267 0 : if (err)
1268 0 : goto cleanup;
1269 0 : le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
1270 0 : err = ext4_ext_dirty(handle, inode, path + i);
1271 0 : if (err)
1272 0 : goto cleanup;
1273 : }
1274 :
1275 0 : i--;
1276 : }
1277 :
1278 : /* insert new index */
1279 0 : err = ext4_ext_insert_index(handle, inode, path + at,
1280 : le32_to_cpu(border), newblock);
1281 :
1282 0 : cleanup:
1283 0 : if (bh) {
1284 0 : if (buffer_locked(bh))
1285 0 : unlock_buffer(bh);
1286 0 : brelse(bh);
1287 : }
1288 :
1289 0 : if (err) {
1290 : /* free all allocated blocks in error case */
1291 0 : for (i = 0; i < depth; i++) {
1292 0 : if (!ablocks[i])
1293 0 : continue;
1294 0 : ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
1295 : EXT4_FREE_BLOCKS_METADATA);
1296 : }
1297 : }
1298 0 : kfree(ablocks);
1299 :
1300 0 : return err;
1301 : }
1302 :
1303 : /*
1304 : * ext4_ext_grow_indepth:
1305 : * implements tree growing procedure:
1306 : * - allocates new block
1307 : * - moves top-level data (index block or leaf) into the new block
1308 : * - initializes new top-level, creating index that points to the
1309 : * just created block
1310 : */
1311 0 : static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1312 : unsigned int flags)
1313 : {
1314 0 : struct ext4_extent_header *neh;
1315 0 : struct buffer_head *bh;
1316 0 : ext4_fsblk_t newblock, goal = 0;
1317 0 : struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
1318 0 : int err = 0;
1319 0 : size_t ext_size = 0;
1320 :
1321 : /* Try to prepend new index to old one */
1322 0 : if (ext_depth(inode))
1323 0 : goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
1324 0 : if (goal > le32_to_cpu(es->s_first_data_block)) {
1325 0 : flags |= EXT4_MB_HINT_TRY_GOAL;
1326 0 : goal--;
1327 : } else
1328 0 : goal = ext4_inode_to_goal_block(inode);
1329 0 : newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
1330 : NULL, &err);
1331 0 : if (newblock == 0)
1332 0 : return err;
1333 :
1334 0 : bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
1335 0 : if (unlikely(!bh))
1336 : return -ENOMEM;
1337 0 : lock_buffer(bh);
1338 :
1339 0 : err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
1340 : EXT4_JTR_NONE);
1341 0 : if (err) {
1342 0 : unlock_buffer(bh);
1343 0 : goto out;
1344 : }
1345 :
1346 0 : ext_size = sizeof(EXT4_I(inode)->i_data);
1347 : /* move top-level index/leaf into new block */
1348 0 : memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size);
1349 : /* zero out unused area in the extent block */
1350 0 : memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
1351 :
1352 : /* set size of new block */
1353 0 : neh = ext_block_hdr(bh);
1354 : /* old root could have indexes or leaves
1355 : * so calculate e_max right way */
1356 0 : if (ext_depth(inode))
1357 0 : neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
1358 : else
1359 0 : neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
1360 0 : neh->eh_magic = EXT4_EXT_MAGIC;
1361 0 : ext4_extent_block_csum_set(inode, neh);
1362 0 : set_buffer_uptodate(bh);
1363 0 : set_buffer_verified(bh);
1364 0 : unlock_buffer(bh);
1365 :
1366 0 : err = ext4_handle_dirty_metadata(handle, inode, bh);
1367 0 : if (err)
1368 0 : goto out;
1369 :
1370 : /* Update top-level index: num,max,pointer */
1371 0 : neh = ext_inode_hdr(inode);
1372 0 : neh->eh_entries = cpu_to_le16(1);
1373 0 : ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1374 0 : if (neh->eh_depth == 0) {
1375 : /* Root extent block becomes index block */
1376 0 : neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1377 0 : EXT_FIRST_INDEX(neh)->ei_block =
1378 : EXT_FIRST_EXTENT(neh)->ee_block;
1379 : }
1380 0 : ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n",
1381 : le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1382 : le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1383 : ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1384 :
1385 0 : le16_add_cpu(&neh->eh_depth, 1);
1386 0 : err = ext4_mark_inode_dirty(handle, inode);
1387 0 : out:
1388 0 : brelse(bh);
1389 :
1390 0 : return err;
1391 : }
1392 :
1393 : /*
1394 : * ext4_ext_create_new_leaf:
1395 : * finds empty index and adds new leaf.
1396 : * if no free index is found, then it requests in-depth growing.
1397 : */
1398 0 : static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1399 : unsigned int mb_flags,
1400 : unsigned int gb_flags,
1401 : struct ext4_ext_path **ppath,
1402 : struct ext4_extent *newext)
1403 : {
1404 0 : struct ext4_ext_path *path = *ppath;
1405 0 : struct ext4_ext_path *curp;
1406 0 : int depth, i, err = 0;
1407 :
1408 0 : repeat:
1409 0 : i = depth = ext_depth(inode);
1410 :
1411 : /* walk up to the tree and look for free index entry */
1412 0 : curp = path + depth;
1413 0 : while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
1414 0 : i--;
1415 0 : curp--;
1416 : }
1417 :
1418 : /* we use already allocated block for index block,
1419 : * so subsequent data blocks should be contiguous */
1420 0 : if (EXT_HAS_FREE_INDEX(curp)) {
1421 : /* if we found index with free entry, then use that
1422 : * entry: create all needed subtree and add new leaf */
1423 0 : err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
1424 0 : if (err)
1425 0 : goto out;
1426 :
1427 : /* refill path */
1428 0 : path = ext4_find_extent(inode,
1429 0 : (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1430 : ppath, gb_flags);
1431 0 : if (IS_ERR(path))
1432 0 : err = PTR_ERR(path);
1433 : } else {
1434 : /* tree is full, time to grow in depth */
1435 0 : err = ext4_ext_grow_indepth(handle, inode, mb_flags);
1436 0 : if (err)
1437 0 : goto out;
1438 :
1439 : /* refill path */
1440 0 : path = ext4_find_extent(inode,
1441 0 : (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1442 : ppath, gb_flags);
1443 0 : if (IS_ERR(path)) {
1444 0 : err = PTR_ERR(path);
1445 0 : goto out;
1446 : }
1447 :
1448 : /*
1449 : * only first (depth 0 -> 1) produces free space;
1450 : * in all other cases we have to split the grown tree
1451 : */
1452 0 : depth = ext_depth(inode);
1453 0 : if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
1454 : /* now we need to split */
1455 0 : goto repeat;
1456 : }
1457 : }
1458 :
1459 0 : out:
1460 0 : return err;
1461 : }
1462 :
1463 : /*
1464 : * search the closest allocated block to the left for *logical
1465 : * and returns it at @logical + it's physical address at @phys
1466 : * if *logical is the smallest allocated block, the function
1467 : * returns 0 at @phys
1468 : * return value contains 0 (success) or error code
1469 : */
1470 0 : static int ext4_ext_search_left(struct inode *inode,
1471 : struct ext4_ext_path *path,
1472 : ext4_lblk_t *logical, ext4_fsblk_t *phys)
1473 : {
1474 0 : struct ext4_extent_idx *ix;
1475 0 : struct ext4_extent *ex;
1476 0 : int depth, ee_len;
1477 :
1478 0 : if (unlikely(path == NULL)) {
1479 0 : EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1480 0 : return -EFSCORRUPTED;
1481 : }
1482 0 : depth = path->p_depth;
1483 0 : *phys = 0;
1484 :
1485 0 : if (depth == 0 && path->p_ext == NULL)
1486 : return 0;
1487 :
1488 : /* usually extent in the path covers blocks smaller
1489 : * then *logical, but it can be that extent is the
1490 : * first one in the file */
1491 :
1492 0 : ex = path[depth].p_ext;
1493 0 : ee_len = ext4_ext_get_actual_len(ex);
1494 0 : if (*logical < le32_to_cpu(ex->ee_block)) {
1495 0 : if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1496 0 : EXT4_ERROR_INODE(inode,
1497 : "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1498 : *logical, le32_to_cpu(ex->ee_block));
1499 0 : return -EFSCORRUPTED;
1500 : }
1501 0 : while (--depth >= 0) {
1502 0 : ix = path[depth].p_idx;
1503 0 : if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1504 0 : EXT4_ERROR_INODE(inode,
1505 : "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1506 : ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
1507 : le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block),
1508 : depth);
1509 0 : return -EFSCORRUPTED;
1510 : }
1511 : }
1512 : return 0;
1513 : }
1514 :
1515 0 : if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1516 0 : EXT4_ERROR_INODE(inode,
1517 : "logical %d < ee_block %d + ee_len %d!",
1518 : *logical, le32_to_cpu(ex->ee_block), ee_len);
1519 0 : return -EFSCORRUPTED;
1520 : }
1521 :
1522 0 : *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1523 0 : *phys = ext4_ext_pblock(ex) + ee_len - 1;
1524 0 : return 0;
1525 : }
1526 :
1527 : /*
1528 : * Search the closest allocated block to the right for *logical
1529 : * and returns it at @logical + it's physical address at @phys.
1530 : * If not exists, return 0 and @phys is set to 0. We will return
1531 : * 1 which means we found an allocated block and ret_ex is valid.
1532 : * Or return a (< 0) error code.
1533 : */
1534 0 : static int ext4_ext_search_right(struct inode *inode,
1535 : struct ext4_ext_path *path,
1536 : ext4_lblk_t *logical, ext4_fsblk_t *phys,
1537 : struct ext4_extent *ret_ex)
1538 : {
1539 0 : struct buffer_head *bh = NULL;
1540 0 : struct ext4_extent_header *eh;
1541 0 : struct ext4_extent_idx *ix;
1542 0 : struct ext4_extent *ex;
1543 0 : int depth; /* Note, NOT eh_depth; depth from top of tree */
1544 0 : int ee_len;
1545 :
1546 0 : if (unlikely(path == NULL)) {
1547 0 : EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1548 0 : return -EFSCORRUPTED;
1549 : }
1550 0 : depth = path->p_depth;
1551 0 : *phys = 0;
1552 :
1553 0 : if (depth == 0 && path->p_ext == NULL)
1554 : return 0;
1555 :
1556 : /* usually extent in the path covers blocks smaller
1557 : * then *logical, but it can be that extent is the
1558 : * first one in the file */
1559 :
1560 0 : ex = path[depth].p_ext;
1561 0 : ee_len = ext4_ext_get_actual_len(ex);
1562 0 : if (*logical < le32_to_cpu(ex->ee_block)) {
1563 0 : if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1564 0 : EXT4_ERROR_INODE(inode,
1565 : "first_extent(path[%d].p_hdr) != ex",
1566 : depth);
1567 0 : return -EFSCORRUPTED;
1568 : }
1569 0 : while (--depth >= 0) {
1570 0 : ix = path[depth].p_idx;
1571 0 : if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1572 0 : EXT4_ERROR_INODE(inode,
1573 : "ix != EXT_FIRST_INDEX *logical %d!",
1574 : *logical);
1575 0 : return -EFSCORRUPTED;
1576 : }
1577 : }
1578 0 : goto found_extent;
1579 : }
1580 :
1581 0 : if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1582 0 : EXT4_ERROR_INODE(inode,
1583 : "logical %d < ee_block %d + ee_len %d!",
1584 : *logical, le32_to_cpu(ex->ee_block), ee_len);
1585 0 : return -EFSCORRUPTED;
1586 : }
1587 :
1588 0 : if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1589 : /* next allocated block in this leaf */
1590 0 : ex++;
1591 0 : goto found_extent;
1592 : }
1593 :
1594 : /* go up and search for index to the right */
1595 0 : while (--depth >= 0) {
1596 0 : ix = path[depth].p_idx;
1597 0 : if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
1598 0 : goto got_index;
1599 : }
1600 :
1601 : /* we've gone up to the root and found no index to the right */
1602 : return 0;
1603 :
1604 : got_index:
1605 : /* we've found index to the right, let's
1606 : * follow it and find the closest allocated
1607 : * block to the right */
1608 0 : ix++;
1609 0 : while (++depth < path->p_depth) {
1610 : /* subtract from p_depth to get proper eh_depth */
1611 0 : bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
1612 0 : if (IS_ERR(bh))
1613 0 : return PTR_ERR(bh);
1614 0 : eh = ext_block_hdr(bh);
1615 0 : ix = EXT_FIRST_INDEX(eh);
1616 0 : put_bh(bh);
1617 : }
1618 :
1619 0 : bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
1620 0 : if (IS_ERR(bh))
1621 0 : return PTR_ERR(bh);
1622 0 : eh = ext_block_hdr(bh);
1623 0 : ex = EXT_FIRST_EXTENT(eh);
1624 0 : found_extent:
1625 0 : *logical = le32_to_cpu(ex->ee_block);
1626 0 : *phys = ext4_ext_pblock(ex);
1627 0 : if (ret_ex)
1628 0 : *ret_ex = *ex;
1629 0 : if (bh)
1630 0 : put_bh(bh);
1631 : return 1;
1632 : }
1633 :
1634 : /*
1635 : * ext4_ext_next_allocated_block:
1636 : * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1637 : * NOTE: it considers block number from index entry as
1638 : * allocated block. Thus, index entries have to be consistent
1639 : * with leaves.
1640 : */
1641 : ext4_lblk_t
1642 0 : ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1643 : {
1644 0 : int depth;
1645 :
1646 0 : BUG_ON(path == NULL);
1647 0 : depth = path->p_depth;
1648 :
1649 0 : if (depth == 0 && path->p_ext == NULL)
1650 : return EXT_MAX_BLOCKS;
1651 :
1652 0 : while (depth >= 0) {
1653 0 : struct ext4_ext_path *p = &path[depth];
1654 :
1655 0 : if (depth == path->p_depth) {
1656 : /* leaf */
1657 0 : if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr))
1658 0 : return le32_to_cpu(p->p_ext[1].ee_block);
1659 : } else {
1660 : /* index */
1661 0 : if (p->p_idx != EXT_LAST_INDEX(p->p_hdr))
1662 0 : return le32_to_cpu(p->p_idx[1].ei_block);
1663 : }
1664 0 : depth--;
1665 : }
1666 :
1667 : return EXT_MAX_BLOCKS;
1668 : }
1669 :
1670 : /*
1671 : * ext4_ext_next_leaf_block:
1672 : * returns first allocated block from next leaf or EXT_MAX_BLOCKS
1673 : */
1674 0 : static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
1675 : {
1676 0 : int depth;
1677 :
1678 0 : BUG_ON(path == NULL);
1679 0 : depth = path->p_depth;
1680 :
1681 : /* zero-tree has no leaf blocks at all */
1682 0 : if (depth == 0)
1683 : return EXT_MAX_BLOCKS;
1684 :
1685 : /* go to index block */
1686 0 : depth--;
1687 :
1688 0 : while (depth >= 0) {
1689 0 : if (path[depth].p_idx !=
1690 0 : EXT_LAST_INDEX(path[depth].p_hdr))
1691 0 : return (ext4_lblk_t)
1692 : le32_to_cpu(path[depth].p_idx[1].ei_block);
1693 0 : depth--;
1694 : }
1695 :
1696 : return EXT_MAX_BLOCKS;
1697 : }
1698 :
1699 : /*
1700 : * ext4_ext_correct_indexes:
1701 : * if leaf gets modified and modified extent is first in the leaf,
1702 : * then we have to correct all indexes above.
1703 : * TODO: do we need to correct tree in all cases?
1704 : */
1705 0 : static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1706 : struct ext4_ext_path *path)
1707 : {
1708 0 : struct ext4_extent_header *eh;
1709 0 : int depth = ext_depth(inode);
1710 0 : struct ext4_extent *ex;
1711 0 : __le32 border;
1712 0 : int k, err = 0;
1713 :
1714 0 : eh = path[depth].p_hdr;
1715 0 : ex = path[depth].p_ext;
1716 :
1717 0 : if (unlikely(ex == NULL || eh == NULL)) {
1718 0 : EXT4_ERROR_INODE(inode,
1719 : "ex %p == NULL or eh %p == NULL", ex, eh);
1720 0 : return -EFSCORRUPTED;
1721 : }
1722 :
1723 0 : if (depth == 0) {
1724 : /* there is no tree at all */
1725 : return 0;
1726 : }
1727 :
1728 0 : if (ex != EXT_FIRST_EXTENT(eh)) {
1729 : /* we correct tree if first leaf got modified only */
1730 : return 0;
1731 : }
1732 :
1733 : /*
1734 : * TODO: we need correction if border is smaller than current one
1735 : */
1736 0 : k = depth - 1;
1737 0 : border = path[depth].p_ext->ee_block;
1738 0 : err = ext4_ext_get_access(handle, inode, path + k);
1739 0 : if (err)
1740 : return err;
1741 0 : path[k].p_idx->ei_block = border;
1742 0 : err = ext4_ext_dirty(handle, inode, path + k);
1743 0 : if (err)
1744 : return err;
1745 :
1746 0 : while (k--) {
1747 : /* change all left-side indexes */
1748 0 : if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
1749 : break;
1750 0 : err = ext4_ext_get_access(handle, inode, path + k);
1751 0 : if (err)
1752 : break;
1753 0 : path[k].p_idx->ei_block = border;
1754 0 : err = ext4_ext_dirty(handle, inode, path + k);
1755 0 : if (err)
1756 : break;
1757 : }
1758 :
1759 : return err;
1760 : }
1761 :
1762 0 : static int ext4_can_extents_be_merged(struct inode *inode,
1763 : struct ext4_extent *ex1,
1764 : struct ext4_extent *ex2)
1765 : {
1766 0 : unsigned short ext1_ee_len, ext2_ee_len;
1767 :
1768 0 : if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
1769 : return 0;
1770 :
1771 0 : ext1_ee_len = ext4_ext_get_actual_len(ex1);
1772 0 : ext2_ee_len = ext4_ext_get_actual_len(ex2);
1773 :
1774 0 : if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
1775 0 : le32_to_cpu(ex2->ee_block))
1776 : return 0;
1777 :
1778 0 : if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
1779 : return 0;
1780 :
1781 0 : if (ext4_ext_is_unwritten(ex1) &&
1782 : ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
1783 : return 0;
1784 : #ifdef AGGRESSIVE_TEST
1785 : if (ext1_ee_len >= 4)
1786 : return 0;
1787 : #endif
1788 :
1789 0 : if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
1790 0 : return 1;
1791 : return 0;
1792 : }
1793 :
1794 : /*
1795 : * This function tries to merge the "ex" extent to the next extent in the tree.
1796 : * It always tries to merge towards right. If you want to merge towards
1797 : * left, pass "ex - 1" as argument instead of "ex".
1798 : * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1799 : * 1 if they got merged.
1800 : */
1801 0 : static int ext4_ext_try_to_merge_right(struct inode *inode,
1802 : struct ext4_ext_path *path,
1803 : struct ext4_extent *ex)
1804 : {
1805 0 : struct ext4_extent_header *eh;
1806 0 : unsigned int depth, len;
1807 0 : int merge_done = 0, unwritten;
1808 :
1809 0 : depth = ext_depth(inode);
1810 0 : BUG_ON(path[depth].p_hdr == NULL);
1811 : eh = path[depth].p_hdr;
1812 :
1813 0 : while (ex < EXT_LAST_EXTENT(eh)) {
1814 0 : if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1815 : break;
1816 : /* merge with next extent! */
1817 0 : unwritten = ext4_ext_is_unwritten(ex);
1818 0 : ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1819 : + ext4_ext_get_actual_len(ex + 1));
1820 0 : if (unwritten)
1821 0 : ext4_ext_mark_unwritten(ex);
1822 :
1823 0 : if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1824 0 : len = (EXT_LAST_EXTENT(eh) - ex - 1)
1825 0 : * sizeof(struct ext4_extent);
1826 0 : memmove(ex + 1, ex + 2, len);
1827 : }
1828 0 : le16_add_cpu(&eh->eh_entries, -1);
1829 0 : merge_done = 1;
1830 0 : WARN_ON(eh->eh_entries == 0);
1831 0 : if (!eh->eh_entries)
1832 0 : EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
1833 : }
1834 :
1835 0 : return merge_done;
1836 : }
1837 :
1838 : /*
1839 : * This function does a very simple check to see if we can collapse
1840 : * an extent tree with a single extent tree leaf block into the inode.
1841 : */
1842 0 : static void ext4_ext_try_to_merge_up(handle_t *handle,
1843 : struct inode *inode,
1844 : struct ext4_ext_path *path)
1845 : {
1846 0 : size_t s;
1847 0 : unsigned max_root = ext4_ext_space_root(inode, 0);
1848 0 : ext4_fsblk_t blk;
1849 :
1850 0 : if ((path[0].p_depth != 1) ||
1851 0 : (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1852 0 : (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1853 : return;
1854 :
1855 : /*
1856 : * We need to modify the block allocation bitmap and the block
1857 : * group descriptor to release the extent tree block. If we
1858 : * can't get the journal credits, give up.
1859 : */
1860 0 : if (ext4_journal_extend(handle, 2,
1861 : ext4_free_metadata_revoke_credits(inode->i_sb, 1)))
1862 : return;
1863 :
1864 : /*
1865 : * Copy the extent data up to the inode
1866 : */
1867 0 : blk = ext4_idx_pblock(path[0].p_idx);
1868 0 : s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1869 : sizeof(struct ext4_extent_idx);
1870 0 : s += sizeof(struct ext4_extent_header);
1871 :
1872 0 : path[1].p_maxdepth = path[0].p_maxdepth;
1873 0 : memcpy(path[0].p_hdr, path[1].p_hdr, s);
1874 0 : path[0].p_depth = 0;
1875 0 : path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1876 0 : (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1877 0 : path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1878 :
1879 0 : brelse(path[1].p_bh);
1880 0 : ext4_free_blocks(handle, inode, NULL, blk, 1,
1881 : EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1882 : }
1883 :
1884 : /*
1885 : * This function tries to merge the @ex extent to neighbours in the tree, then
1886 : * tries to collapse the extent tree into the inode.
1887 : */
1888 0 : static void ext4_ext_try_to_merge(handle_t *handle,
1889 : struct inode *inode,
1890 : struct ext4_ext_path *path,
1891 : struct ext4_extent *ex)
1892 : {
1893 0 : struct ext4_extent_header *eh;
1894 0 : unsigned int depth;
1895 0 : int merge_done = 0;
1896 :
1897 0 : depth = ext_depth(inode);
1898 0 : BUG_ON(path[depth].p_hdr == NULL);
1899 0 : eh = path[depth].p_hdr;
1900 :
1901 0 : if (ex > EXT_FIRST_EXTENT(eh))
1902 0 : merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1903 :
1904 0 : if (!merge_done)
1905 0 : (void) ext4_ext_try_to_merge_right(inode, path, ex);
1906 :
1907 0 : ext4_ext_try_to_merge_up(handle, inode, path);
1908 0 : }
1909 :
1910 : /*
1911 : * check if a portion of the "newext" extent overlaps with an
1912 : * existing extent.
1913 : *
1914 : * If there is an overlap discovered, it updates the length of the newext
1915 : * such that there will be no overlap, and then returns 1.
1916 : * If there is no overlap found, it returns 0.
1917 : */
1918 0 : static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1919 : struct inode *inode,
1920 : struct ext4_extent *newext,
1921 : struct ext4_ext_path *path)
1922 : {
1923 0 : ext4_lblk_t b1, b2;
1924 0 : unsigned int depth, len1;
1925 0 : unsigned int ret = 0;
1926 :
1927 0 : b1 = le32_to_cpu(newext->ee_block);
1928 0 : len1 = ext4_ext_get_actual_len(newext);
1929 0 : depth = ext_depth(inode);
1930 0 : if (!path[depth].p_ext)
1931 0 : goto out;
1932 0 : b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
1933 :
1934 : /*
1935 : * get the next allocated block if the extent in the path
1936 : * is before the requested block(s)
1937 : */
1938 0 : if (b2 < b1) {
1939 0 : b2 = ext4_ext_next_allocated_block(path);
1940 0 : if (b2 == EXT_MAX_BLOCKS)
1941 0 : goto out;
1942 0 : b2 = EXT4_LBLK_CMASK(sbi, b2);
1943 : }
1944 :
1945 : /* check for wrap through zero on extent logical start block*/
1946 0 : if (b1 + len1 < b1) {
1947 0 : len1 = EXT_MAX_BLOCKS - b1;
1948 0 : newext->ee_len = cpu_to_le16(len1);
1949 0 : ret = 1;
1950 : }
1951 :
1952 : /* check for overlap */
1953 0 : if (b1 + len1 > b2) {
1954 0 : newext->ee_len = cpu_to_le16(b2 - b1);
1955 0 : ret = 1;
1956 : }
1957 0 : out:
1958 0 : return ret;
1959 : }
1960 :
1961 : /*
1962 : * ext4_ext_insert_extent:
1963 : * tries to merge requested extent into the existing extent or
1964 : * inserts requested extent as new one into the tree,
1965 : * creating new leaf in the no-space case.
1966 : */
1967 0 : int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1968 : struct ext4_ext_path **ppath,
1969 : struct ext4_extent *newext, int gb_flags)
1970 : {
1971 0 : struct ext4_ext_path *path = *ppath;
1972 0 : struct ext4_extent_header *eh;
1973 0 : struct ext4_extent *ex, *fex;
1974 0 : struct ext4_extent *nearex; /* nearest extent */
1975 0 : struct ext4_ext_path *npath = NULL;
1976 0 : int depth, len, err;
1977 0 : ext4_lblk_t next;
1978 0 : int mb_flags = 0, unwritten;
1979 :
1980 0 : if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1981 0 : mb_flags |= EXT4_MB_DELALLOC_RESERVED;
1982 0 : if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1983 0 : EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1984 0 : return -EFSCORRUPTED;
1985 : }
1986 0 : depth = ext_depth(inode);
1987 0 : ex = path[depth].p_ext;
1988 0 : eh = path[depth].p_hdr;
1989 0 : if (unlikely(path[depth].p_hdr == NULL)) {
1990 0 : EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1991 0 : return -EFSCORRUPTED;
1992 : }
1993 :
1994 : /* try to insert block into found extent and return */
1995 0 : if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
1996 :
1997 : /*
1998 : * Try to see whether we should rather test the extent on
1999 : * right from ex, or from the left of ex. This is because
2000 : * ext4_find_extent() can return either extent on the
2001 : * left, or on the right from the searched position. This
2002 : * will make merging more effective.
2003 : */
2004 0 : if (ex < EXT_LAST_EXTENT(eh) &&
2005 0 : (le32_to_cpu(ex->ee_block) +
2006 0 : ext4_ext_get_actual_len(ex) <
2007 0 : le32_to_cpu(newext->ee_block))) {
2008 0 : ex += 1;
2009 0 : goto prepend;
2010 0 : } else if ((ex > EXT_FIRST_EXTENT(eh)) &&
2011 0 : (le32_to_cpu(newext->ee_block) +
2012 0 : ext4_ext_get_actual_len(newext) <
2013 0 : le32_to_cpu(ex->ee_block)))
2014 0 : ex -= 1;
2015 :
2016 : /* Try to append newex to the ex */
2017 0 : if (ext4_can_extents_be_merged(inode, ex, newext)) {
2018 0 : ext_debug(inode, "append [%d]%d block to %u:[%d]%d"
2019 : "(from %llu)\n",
2020 : ext4_ext_is_unwritten(newext),
2021 : ext4_ext_get_actual_len(newext),
2022 : le32_to_cpu(ex->ee_block),
2023 : ext4_ext_is_unwritten(ex),
2024 : ext4_ext_get_actual_len(ex),
2025 : ext4_ext_pblock(ex));
2026 0 : err = ext4_ext_get_access(handle, inode,
2027 : path + depth);
2028 0 : if (err)
2029 : return err;
2030 0 : unwritten = ext4_ext_is_unwritten(ex);
2031 0 : ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
2032 : + ext4_ext_get_actual_len(newext));
2033 0 : if (unwritten)
2034 0 : ext4_ext_mark_unwritten(ex);
2035 0 : nearex = ex;
2036 0 : goto merge;
2037 : }
2038 :
2039 0 : prepend:
2040 : /* Try to prepend newex to the ex */
2041 0 : if (ext4_can_extents_be_merged(inode, newext, ex)) {
2042 0 : ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d"
2043 : "(from %llu)\n",
2044 : le32_to_cpu(newext->ee_block),
2045 : ext4_ext_is_unwritten(newext),
2046 : ext4_ext_get_actual_len(newext),
2047 : le32_to_cpu(ex->ee_block),
2048 : ext4_ext_is_unwritten(ex),
2049 : ext4_ext_get_actual_len(ex),
2050 : ext4_ext_pblock(ex));
2051 0 : err = ext4_ext_get_access(handle, inode,
2052 : path + depth);
2053 0 : if (err)
2054 : return err;
2055 :
2056 0 : unwritten = ext4_ext_is_unwritten(ex);
2057 0 : ex->ee_block = newext->ee_block;
2058 0 : ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
2059 0 : ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
2060 : + ext4_ext_get_actual_len(newext));
2061 0 : if (unwritten)
2062 0 : ext4_ext_mark_unwritten(ex);
2063 0 : nearex = ex;
2064 0 : goto merge;
2065 : }
2066 : }
2067 :
2068 0 : depth = ext_depth(inode);
2069 0 : eh = path[depth].p_hdr;
2070 0 : if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
2071 0 : goto has_space;
2072 :
2073 : /* probably next leaf has space for us? */
2074 0 : fex = EXT_LAST_EXTENT(eh);
2075 0 : next = EXT_MAX_BLOCKS;
2076 0 : if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
2077 0 : next = ext4_ext_next_leaf_block(path);
2078 0 : if (next != EXT_MAX_BLOCKS) {
2079 0 : ext_debug(inode, "next leaf block - %u\n", next);
2080 0 : BUG_ON(npath != NULL);
2081 0 : npath = ext4_find_extent(inode, next, NULL, gb_flags);
2082 0 : if (IS_ERR(npath))
2083 0 : return PTR_ERR(npath);
2084 0 : BUG_ON(npath->p_depth != path->p_depth);
2085 0 : eh = npath[depth].p_hdr;
2086 0 : if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
2087 0 : ext_debug(inode, "next leaf isn't full(%d)\n",
2088 : le16_to_cpu(eh->eh_entries));
2089 0 : path = npath;
2090 0 : goto has_space;
2091 : }
2092 : ext_debug(inode, "next leaf has no free space(%d,%d)\n",
2093 : le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
2094 : }
2095 :
2096 : /*
2097 : * There is no free space in the found leaf.
2098 : * We're gonna add a new leaf in the tree.
2099 : */
2100 0 : if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
2101 0 : mb_flags |= EXT4_MB_USE_RESERVED;
2102 0 : err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
2103 : ppath, newext);
2104 0 : if (err)
2105 0 : goto cleanup;
2106 0 : depth = ext_depth(inode);
2107 0 : eh = path[depth].p_hdr;
2108 :
2109 0 : has_space:
2110 0 : nearex = path[depth].p_ext;
2111 :
2112 0 : err = ext4_ext_get_access(handle, inode, path + depth);
2113 0 : if (err)
2114 0 : goto cleanup;
2115 :
2116 0 : if (!nearex) {
2117 : /* there is no extent in this leaf, create first one */
2118 0 : ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n",
2119 : le32_to_cpu(newext->ee_block),
2120 : ext4_ext_pblock(newext),
2121 : ext4_ext_is_unwritten(newext),
2122 : ext4_ext_get_actual_len(newext));
2123 0 : nearex = EXT_FIRST_EXTENT(eh);
2124 : } else {
2125 0 : if (le32_to_cpu(newext->ee_block)
2126 0 : > le32_to_cpu(nearex->ee_block)) {
2127 : /* Insert after */
2128 0 : ext_debug(inode, "insert %u:%llu:[%d]%d before: "
2129 : "nearest %p\n",
2130 : le32_to_cpu(newext->ee_block),
2131 : ext4_ext_pblock(newext),
2132 : ext4_ext_is_unwritten(newext),
2133 : ext4_ext_get_actual_len(newext),
2134 : nearex);
2135 0 : nearex++;
2136 : } else {
2137 : /* Insert before */
2138 0 : BUG_ON(newext->ee_block == nearex->ee_block);
2139 : ext_debug(inode, "insert %u:%llu:[%d]%d after: "
2140 : "nearest %p\n",
2141 : le32_to_cpu(newext->ee_block),
2142 : ext4_ext_pblock(newext),
2143 : ext4_ext_is_unwritten(newext),
2144 : ext4_ext_get_actual_len(newext),
2145 : nearex);
2146 : }
2147 0 : len = EXT_LAST_EXTENT(eh) - nearex + 1;
2148 0 : if (len > 0) {
2149 0 : ext_debug(inode, "insert %u:%llu:[%d]%d: "
2150 : "move %d extents from 0x%p to 0x%p\n",
2151 : le32_to_cpu(newext->ee_block),
2152 : ext4_ext_pblock(newext),
2153 : ext4_ext_is_unwritten(newext),
2154 : ext4_ext_get_actual_len(newext),
2155 : len, nearex, nearex + 1);
2156 0 : memmove(nearex + 1, nearex,
2157 : len * sizeof(struct ext4_extent));
2158 : }
2159 : }
2160 :
2161 0 : le16_add_cpu(&eh->eh_entries, 1);
2162 0 : path[depth].p_ext = nearex;
2163 0 : nearex->ee_block = newext->ee_block;
2164 0 : ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
2165 0 : nearex->ee_len = newext->ee_len;
2166 :
2167 0 : merge:
2168 : /* try to merge extents */
2169 0 : if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
2170 0 : ext4_ext_try_to_merge(handle, inode, path, nearex);
2171 :
2172 :
2173 : /* time to correct all indexes above */
2174 0 : err = ext4_ext_correct_indexes(handle, inode, path);
2175 0 : if (err)
2176 0 : goto cleanup;
2177 :
2178 0 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2179 :
2180 0 : cleanup:
2181 0 : ext4_free_ext_path(npath);
2182 0 : return err;
2183 : }
2184 :
2185 0 : static int ext4_fill_es_cache_info(struct inode *inode,
2186 : ext4_lblk_t block, ext4_lblk_t num,
2187 : struct fiemap_extent_info *fieinfo)
2188 : {
2189 0 : ext4_lblk_t next, end = block + num - 1;
2190 0 : struct extent_status es;
2191 0 : unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
2192 0 : unsigned int flags;
2193 0 : int err;
2194 :
2195 0 : while (block <= end) {
2196 0 : next = 0;
2197 0 : flags = 0;
2198 0 : if (!ext4_es_lookup_extent(inode, block, &next, &es))
2199 : break;
2200 0 : if (ext4_es_is_unwritten(&es))
2201 0 : flags |= FIEMAP_EXTENT_UNWRITTEN;
2202 0 : if (ext4_es_is_delayed(&es))
2203 0 : flags |= (FIEMAP_EXTENT_DELALLOC |
2204 : FIEMAP_EXTENT_UNKNOWN);
2205 0 : if (ext4_es_is_hole(&es))
2206 0 : flags |= EXT4_FIEMAP_EXTENT_HOLE;
2207 0 : if (next == 0)
2208 0 : flags |= FIEMAP_EXTENT_LAST;
2209 0 : if (flags & (FIEMAP_EXTENT_DELALLOC|
2210 : EXT4_FIEMAP_EXTENT_HOLE))
2211 0 : es.es_pblk = 0;
2212 : else
2213 0 : es.es_pblk = ext4_es_pblock(&es);
2214 0 : err = fiemap_fill_next_extent(fieinfo,
2215 0 : (__u64)es.es_lblk << blksize_bits,
2216 0 : (__u64)es.es_pblk << blksize_bits,
2217 0 : (__u64)es.es_len << blksize_bits,
2218 : flags);
2219 0 : if (next == 0)
2220 : break;
2221 0 : block = next;
2222 0 : if (err < 0)
2223 0 : return err;
2224 0 : if (err == 1)
2225 : return 0;
2226 : }
2227 : return 0;
2228 : }
2229 :
2230 :
2231 : /*
2232 : * ext4_ext_determine_hole - determine hole around given block
2233 : * @inode: inode we lookup in
2234 : * @path: path in extent tree to @lblk
2235 : * @lblk: pointer to logical block around which we want to determine hole
2236 : *
2237 : * Determine hole length (and start if easily possible) around given logical
2238 : * block. We don't try too hard to find the beginning of the hole but @path
2239 : * actually points to extent before @lblk, we provide it.
2240 : *
2241 : * The function returns the length of a hole starting at @lblk. We update @lblk
2242 : * to the beginning of the hole if we managed to find it.
2243 : */
2244 0 : static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
2245 : struct ext4_ext_path *path,
2246 : ext4_lblk_t *lblk)
2247 : {
2248 0 : int depth = ext_depth(inode);
2249 0 : struct ext4_extent *ex;
2250 0 : ext4_lblk_t len;
2251 :
2252 0 : ex = path[depth].p_ext;
2253 0 : if (ex == NULL) {
2254 : /* there is no extent yet, so gap is [0;-] */
2255 0 : *lblk = 0;
2256 0 : len = EXT_MAX_BLOCKS;
2257 0 : } else if (*lblk < le32_to_cpu(ex->ee_block)) {
2258 0 : len = le32_to_cpu(ex->ee_block) - *lblk;
2259 0 : } else if (*lblk >= le32_to_cpu(ex->ee_block)
2260 0 : + ext4_ext_get_actual_len(ex)) {
2261 0 : ext4_lblk_t next;
2262 :
2263 0 : *lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
2264 0 : next = ext4_ext_next_allocated_block(path);
2265 0 : BUG_ON(next == *lblk);
2266 0 : len = next - *lblk;
2267 : } else {
2268 0 : BUG();
2269 : }
2270 0 : return len;
2271 : }
2272 :
2273 : /*
2274 : * ext4_ext_put_gap_in_cache:
2275 : * calculate boundaries of the gap that the requested block fits into
2276 : * and cache this gap
2277 : */
2278 : static void
2279 0 : ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
2280 : ext4_lblk_t hole_len)
2281 : {
2282 0 : struct extent_status es;
2283 :
2284 0 : ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
2285 0 : hole_start + hole_len - 1, &es);
2286 0 : if (es.es_len) {
2287 : /* There's delayed extent containing lblock? */
2288 0 : if (es.es_lblk <= hole_start)
2289 0 : return;
2290 0 : hole_len = min(es.es_lblk - hole_start, hole_len);
2291 : }
2292 0 : ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
2293 0 : ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
2294 : EXTENT_STATUS_HOLE);
2295 : }
2296 :
2297 : /*
2298 : * ext4_ext_rm_idx:
2299 : * removes index from the index block.
2300 : */
2301 0 : static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2302 : struct ext4_ext_path *path, int depth)
2303 : {
2304 0 : int err;
2305 0 : ext4_fsblk_t leaf;
2306 :
2307 : /* free index block */
2308 0 : depth--;
2309 0 : path = path + depth;
2310 0 : leaf = ext4_idx_pblock(path->p_idx);
2311 0 : if (unlikely(path->p_hdr->eh_entries == 0)) {
2312 0 : EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
2313 0 : return -EFSCORRUPTED;
2314 : }
2315 0 : err = ext4_ext_get_access(handle, inode, path);
2316 0 : if (err)
2317 : return err;
2318 :
2319 0 : if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) {
2320 0 : int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx;
2321 0 : len *= sizeof(struct ext4_extent_idx);
2322 0 : memmove(path->p_idx, path->p_idx + 1, len);
2323 : }
2324 :
2325 0 : le16_add_cpu(&path->p_hdr->eh_entries, -1);
2326 0 : err = ext4_ext_dirty(handle, inode, path);
2327 0 : if (err)
2328 : return err;
2329 0 : ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf);
2330 0 : trace_ext4_ext_rm_idx(inode, leaf);
2331 :
2332 0 : ext4_free_blocks(handle, inode, NULL, leaf, 1,
2333 : EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
2334 :
2335 0 : while (--depth >= 0) {
2336 0 : if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
2337 : break;
2338 0 : path--;
2339 0 : err = ext4_ext_get_access(handle, inode, path);
2340 0 : if (err)
2341 : break;
2342 0 : path->p_idx->ei_block = (path+1)->p_idx->ei_block;
2343 0 : err = ext4_ext_dirty(handle, inode, path);
2344 0 : if (err)
2345 : break;
2346 : }
2347 : return err;
2348 : }
2349 :
2350 : /*
2351 : * ext4_ext_calc_credits_for_single_extent:
2352 : * This routine returns max. credits that needed to insert an extent
2353 : * to the extent tree.
2354 : * When pass the actual path, the caller should calculate credits
2355 : * under i_data_sem.
2356 : */
2357 0 : int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
2358 : struct ext4_ext_path *path)
2359 : {
2360 0 : if (path) {
2361 0 : int depth = ext_depth(inode);
2362 0 : int ret = 0;
2363 :
2364 : /* probably there is space in leaf? */
2365 0 : if (le16_to_cpu(path[depth].p_hdr->eh_entries)
2366 0 : < le16_to_cpu(path[depth].p_hdr->eh_max)) {
2367 :
2368 : /*
2369 : * There are some space in the leaf tree, no
2370 : * need to account for leaf block credit
2371 : *
2372 : * bitmaps and block group descriptor blocks
2373 : * and other metadata blocks still need to be
2374 : * accounted.
2375 : */
2376 : /* 1 bitmap, 1 block group descriptor */
2377 0 : ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
2378 0 : return ret;
2379 : }
2380 : }
2381 :
2382 0 : return ext4_chunk_trans_blocks(inode, nrblocks);
2383 : }
2384 :
2385 : /*
2386 : * How many index/leaf blocks need to change/allocate to add @extents extents?
2387 : *
2388 : * If we add a single extent, then in the worse case, each tree level
2389 : * index/leaf need to be changed in case of the tree split.
2390 : *
2391 : * If more extents are inserted, they could cause the whole tree split more
2392 : * than once, but this is really rare.
2393 : */
2394 0 : int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
2395 : {
2396 0 : int index;
2397 0 : int depth;
2398 :
2399 : /* If we are converting the inline data, only one is needed here. */
2400 0 : if (ext4_has_inline_data(inode))
2401 : return 1;
2402 :
2403 0 : depth = ext_depth(inode);
2404 :
2405 0 : if (extents <= 1)
2406 0 : index = depth * 2;
2407 : else
2408 0 : index = depth * 3;
2409 :
2410 : return index;
2411 : }
2412 :
2413 0 : static inline int get_default_free_blocks_flags(struct inode *inode)
2414 : {
2415 0 : if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
2416 : ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
2417 : return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
2418 0 : else if (ext4_should_journal_data(inode))
2419 0 : return EXT4_FREE_BLOCKS_FORGET;
2420 : return 0;
2421 : }
2422 :
2423 : /*
2424 : * ext4_rereserve_cluster - increment the reserved cluster count when
2425 : * freeing a cluster with a pending reservation
2426 : *
2427 : * @inode - file containing the cluster
2428 : * @lblk - logical block in cluster to be reserved
2429 : *
2430 : * Increments the reserved cluster count and adjusts quota in a bigalloc
2431 : * file system when freeing a partial cluster containing at least one
2432 : * delayed and unwritten block. A partial cluster meeting that
2433 : * requirement will have a pending reservation. If so, the
2434 : * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
2435 : * defer reserved and allocated space accounting to a subsequent call
2436 : * to this function.
2437 : */
2438 0 : static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
2439 : {
2440 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2441 0 : struct ext4_inode_info *ei = EXT4_I(inode);
2442 :
2443 0 : dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
2444 :
2445 0 : spin_lock(&ei->i_block_reservation_lock);
2446 0 : ei->i_reserved_data_blocks++;
2447 0 : percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
2448 0 : spin_unlock(&ei->i_block_reservation_lock);
2449 :
2450 0 : percpu_counter_add(&sbi->s_freeclusters_counter, 1);
2451 0 : ext4_remove_pending(inode, lblk);
2452 0 : }
2453 :
2454 0 : static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2455 : struct ext4_extent *ex,
2456 : struct partial_cluster *partial,
2457 : ext4_lblk_t from, ext4_lblk_t to)
2458 : {
2459 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2460 0 : unsigned short ee_len = ext4_ext_get_actual_len(ex);
2461 0 : ext4_fsblk_t last_pblk, pblk;
2462 0 : ext4_lblk_t num;
2463 0 : int flags;
2464 :
2465 : /* only extent tail removal is allowed */
2466 0 : if (from < le32_to_cpu(ex->ee_block) ||
2467 0 : to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
2468 0 : ext4_error(sbi->s_sb,
2469 : "strange request: removal(2) %u-%u from %u:%u",
2470 : from, to, le32_to_cpu(ex->ee_block), ee_len);
2471 0 : return 0;
2472 : }
2473 :
2474 : #ifdef EXTENTS_STATS
2475 : spin_lock(&sbi->s_ext_stats_lock);
2476 : sbi->s_ext_blocks += ee_len;
2477 : sbi->s_ext_extents++;
2478 : if (ee_len < sbi->s_ext_min)
2479 : sbi->s_ext_min = ee_len;
2480 : if (ee_len > sbi->s_ext_max)
2481 : sbi->s_ext_max = ee_len;
2482 : if (ext_depth(inode) > sbi->s_depth_max)
2483 : sbi->s_depth_max = ext_depth(inode);
2484 : spin_unlock(&sbi->s_ext_stats_lock);
2485 : #endif
2486 :
2487 0 : trace_ext4_remove_blocks(inode, ex, from, to, partial);
2488 :
2489 : /*
2490 : * if we have a partial cluster, and it's different from the
2491 : * cluster of the last block in the extent, we free it
2492 : */
2493 0 : last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
2494 :
2495 0 : if (partial->state != initial &&
2496 0 : partial->pclu != EXT4_B2C(sbi, last_pblk)) {
2497 0 : if (partial->state == tofree) {
2498 0 : flags = get_default_free_blocks_flags(inode);
2499 0 : if (ext4_is_pending(inode, partial->lblk))
2500 0 : flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2501 0 : ext4_free_blocks(handle, inode, NULL,
2502 0 : EXT4_C2B(sbi, partial->pclu),
2503 0 : sbi->s_cluster_ratio, flags);
2504 0 : if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2505 0 : ext4_rereserve_cluster(inode, partial->lblk);
2506 : }
2507 0 : partial->state = initial;
2508 : }
2509 :
2510 0 : num = le32_to_cpu(ex->ee_block) + ee_len - from;
2511 0 : pblk = ext4_ext_pblock(ex) + ee_len - num;
2512 :
2513 : /*
2514 : * We free the partial cluster at the end of the extent (if any),
2515 : * unless the cluster is used by another extent (partial_cluster
2516 : * state is nofree). If a partial cluster exists here, it must be
2517 : * shared with the last block in the extent.
2518 : */
2519 0 : flags = get_default_free_blocks_flags(inode);
2520 :
2521 : /* partial, left end cluster aligned, right end unaligned */
2522 0 : if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
2523 0 : (EXT4_LBLK_CMASK(sbi, to) >= from) &&
2524 0 : (partial->state != nofree)) {
2525 0 : if (ext4_is_pending(inode, to))
2526 0 : flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2527 0 : ext4_free_blocks(handle, inode, NULL,
2528 0 : EXT4_PBLK_CMASK(sbi, last_pblk),
2529 : sbi->s_cluster_ratio, flags);
2530 0 : if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2531 0 : ext4_rereserve_cluster(inode, to);
2532 0 : partial->state = initial;
2533 0 : flags = get_default_free_blocks_flags(inode);
2534 : }
2535 :
2536 0 : flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
2537 :
2538 : /*
2539 : * For bigalloc file systems, we never free a partial cluster
2540 : * at the beginning of the extent. Instead, we check to see if we
2541 : * need to free it on a subsequent call to ext4_remove_blocks,
2542 : * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
2543 : */
2544 0 : flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
2545 0 : ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
2546 :
2547 : /* reset the partial cluster if we've freed past it */
2548 0 : if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
2549 0 : partial->state = initial;
2550 :
2551 : /*
2552 : * If we've freed the entire extent but the beginning is not left
2553 : * cluster aligned and is not marked as ineligible for freeing we
2554 : * record the partial cluster at the beginning of the extent. It
2555 : * wasn't freed by the preceding ext4_free_blocks() call, and we
2556 : * need to look farther to the left to determine if it's to be freed
2557 : * (not shared with another extent). Else, reset the partial
2558 : * cluster - we're either done freeing or the beginning of the
2559 : * extent is left cluster aligned.
2560 : */
2561 0 : if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
2562 0 : if (partial->state == initial) {
2563 0 : partial->pclu = EXT4_B2C(sbi, pblk);
2564 0 : partial->lblk = from;
2565 0 : partial->state = tofree;
2566 : }
2567 : } else {
2568 0 : partial->state = initial;
2569 : }
2570 :
2571 : return 0;
2572 : }
2573 :
2574 : /*
2575 : * ext4_ext_rm_leaf() Removes the extents associated with the
2576 : * blocks appearing between "start" and "end". Both "start"
2577 : * and "end" must appear in the same extent or EIO is returned.
2578 : *
2579 : * @handle: The journal handle
2580 : * @inode: The files inode
2581 : * @path: The path to the leaf
2582 : * @partial_cluster: The cluster which we'll have to free if all extents
2583 : * has been released from it. However, if this value is
2584 : * negative, it's a cluster just to the right of the
2585 : * punched region and it must not be freed.
2586 : * @start: The first block to remove
2587 : * @end: The last block to remove
2588 : */
2589 : static int
2590 0 : ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2591 : struct ext4_ext_path *path,
2592 : struct partial_cluster *partial,
2593 : ext4_lblk_t start, ext4_lblk_t end)
2594 : {
2595 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2596 0 : int err = 0, correct_index = 0;
2597 0 : int depth = ext_depth(inode), credits, revoke_credits;
2598 0 : struct ext4_extent_header *eh;
2599 0 : ext4_lblk_t a, b;
2600 0 : unsigned num;
2601 0 : ext4_lblk_t ex_ee_block;
2602 0 : unsigned short ex_ee_len;
2603 0 : unsigned unwritten = 0;
2604 0 : struct ext4_extent *ex;
2605 0 : ext4_fsblk_t pblk;
2606 :
2607 : /* the header must be checked already in ext4_ext_remove_space() */
2608 0 : ext_debug(inode, "truncate since %u in leaf to %u\n", start, end);
2609 0 : if (!path[depth].p_hdr)
2610 0 : path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2611 0 : eh = path[depth].p_hdr;
2612 0 : if (unlikely(path[depth].p_hdr == NULL)) {
2613 0 : EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
2614 0 : return -EFSCORRUPTED;
2615 : }
2616 : /* find where to start removing */
2617 0 : ex = path[depth].p_ext;
2618 0 : if (!ex)
2619 0 : ex = EXT_LAST_EXTENT(eh);
2620 :
2621 0 : ex_ee_block = le32_to_cpu(ex->ee_block);
2622 0 : ex_ee_len = ext4_ext_get_actual_len(ex);
2623 :
2624 0 : trace_ext4_ext_rm_leaf(inode, start, ex, partial);
2625 :
2626 0 : while (ex >= EXT_FIRST_EXTENT(eh) &&
2627 0 : ex_ee_block + ex_ee_len > start) {
2628 :
2629 0 : if (ext4_ext_is_unwritten(ex))
2630 : unwritten = 1;
2631 : else
2632 0 : unwritten = 0;
2633 :
2634 0 : ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block,
2635 : unwritten, ex_ee_len);
2636 0 : path[depth].p_ext = ex;
2637 :
2638 0 : a = max(ex_ee_block, start);
2639 0 : b = min(ex_ee_block + ex_ee_len - 1, end);
2640 :
2641 0 : ext_debug(inode, " border %u:%u\n", a, b);
2642 :
2643 : /* If this extent is beyond the end of the hole, skip it */
2644 0 : if (end < ex_ee_block) {
2645 : /*
2646 : * We're going to skip this extent and move to another,
2647 : * so note that its first cluster is in use to avoid
2648 : * freeing it when removing blocks. Eventually, the
2649 : * right edge of the truncated/punched region will
2650 : * be just to the left.
2651 : */
2652 0 : if (sbi->s_cluster_ratio > 1) {
2653 0 : pblk = ext4_ext_pblock(ex);
2654 0 : partial->pclu = EXT4_B2C(sbi, pblk);
2655 0 : partial->state = nofree;
2656 : }
2657 0 : ex--;
2658 0 : ex_ee_block = le32_to_cpu(ex->ee_block);
2659 0 : ex_ee_len = ext4_ext_get_actual_len(ex);
2660 0 : continue;
2661 0 : } else if (b != ex_ee_block + ex_ee_len - 1) {
2662 0 : EXT4_ERROR_INODE(inode,
2663 : "can not handle truncate %u:%u "
2664 : "on extent %u:%u",
2665 : start, end, ex_ee_block,
2666 : ex_ee_block + ex_ee_len - 1);
2667 0 : err = -EFSCORRUPTED;
2668 0 : goto out;
2669 0 : } else if (a != ex_ee_block) {
2670 : /* remove tail of the extent */
2671 0 : num = a - ex_ee_block;
2672 : } else {
2673 : /* remove whole extent: excellent! */
2674 : num = 0;
2675 : }
2676 : /*
2677 : * 3 for leaf, sb, and inode plus 2 (bmap and group
2678 : * descriptor) for each block group; assume two block
2679 : * groups plus ex_ee_len/blocks_per_block_group for
2680 : * the worst case
2681 : */
2682 0 : credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
2683 0 : if (ex == EXT_FIRST_EXTENT(eh)) {
2684 0 : correct_index = 1;
2685 0 : credits += (ext_depth(inode)) + 1;
2686 : }
2687 0 : credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
2688 : /*
2689 : * We may end up freeing some index blocks and data from the
2690 : * punched range. Note that partial clusters are accounted for
2691 : * by ext4_free_data_revoke_credits().
2692 : */
2693 0 : revoke_credits =
2694 : ext4_free_metadata_revoke_credits(inode->i_sb,
2695 : ext_depth(inode)) +
2696 0 : ext4_free_data_revoke_credits(inode, b - a + 1);
2697 :
2698 0 : err = ext4_datasem_ensure_credits(handle, inode, credits,
2699 : credits, revoke_credits);
2700 0 : if (err) {
2701 0 : if (err > 0)
2702 0 : err = -EAGAIN;
2703 0 : goto out;
2704 : }
2705 :
2706 0 : err = ext4_ext_get_access(handle, inode, path + depth);
2707 0 : if (err)
2708 0 : goto out;
2709 :
2710 0 : err = ext4_remove_blocks(handle, inode, ex, partial, a, b);
2711 0 : if (err)
2712 0 : goto out;
2713 :
2714 0 : if (num == 0)
2715 : /* this extent is removed; mark slot entirely unused */
2716 0 : ext4_ext_store_pblock(ex, 0);
2717 :
2718 0 : ex->ee_len = cpu_to_le16(num);
2719 : /*
2720 : * Do not mark unwritten if all the blocks in the
2721 : * extent have been removed.
2722 : */
2723 0 : if (unwritten && num)
2724 0 : ext4_ext_mark_unwritten(ex);
2725 : /*
2726 : * If the extent was completely released,
2727 : * we need to remove it from the leaf
2728 : */
2729 0 : if (num == 0) {
2730 0 : if (end != EXT_MAX_BLOCKS - 1) {
2731 : /*
2732 : * For hole punching, we need to scoot all the
2733 : * extents up when an extent is removed so that
2734 : * we dont have blank extents in the middle
2735 : */
2736 0 : memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) *
2737 : sizeof(struct ext4_extent));
2738 :
2739 : /* Now get rid of the one at the end */
2740 0 : memset(EXT_LAST_EXTENT(eh), 0,
2741 : sizeof(struct ext4_extent));
2742 : }
2743 0 : le16_add_cpu(&eh->eh_entries, -1);
2744 : }
2745 :
2746 0 : err = ext4_ext_dirty(handle, inode, path + depth);
2747 0 : if (err)
2748 0 : goto out;
2749 :
2750 0 : ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num,
2751 : ext4_ext_pblock(ex));
2752 0 : ex--;
2753 0 : ex_ee_block = le32_to_cpu(ex->ee_block);
2754 0 : ex_ee_len = ext4_ext_get_actual_len(ex);
2755 : }
2756 :
2757 0 : if (correct_index && eh->eh_entries)
2758 0 : err = ext4_ext_correct_indexes(handle, inode, path);
2759 :
2760 : /*
2761 : * If there's a partial cluster and at least one extent remains in
2762 : * the leaf, free the partial cluster if it isn't shared with the
2763 : * current extent. If it is shared with the current extent
2764 : * we reset the partial cluster because we've reached the start of the
2765 : * truncated/punched region and we're done removing blocks.
2766 : */
2767 0 : if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
2768 0 : pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
2769 0 : if (partial->pclu != EXT4_B2C(sbi, pblk)) {
2770 0 : int flags = get_default_free_blocks_flags(inode);
2771 :
2772 0 : if (ext4_is_pending(inode, partial->lblk))
2773 0 : flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2774 0 : ext4_free_blocks(handle, inode, NULL,
2775 0 : EXT4_C2B(sbi, partial->pclu),
2776 0 : sbi->s_cluster_ratio, flags);
2777 0 : if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2778 0 : ext4_rereserve_cluster(inode, partial->lblk);
2779 : }
2780 0 : partial->state = initial;
2781 : }
2782 :
2783 : /* if this leaf is free, then we should
2784 : * remove it from index block above */
2785 0 : if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
2786 0 : err = ext4_ext_rm_idx(handle, inode, path, depth);
2787 :
2788 0 : out:
2789 : return err;
2790 : }
2791 :
2792 : /*
2793 : * ext4_ext_more_to_rm:
2794 : * returns 1 if current index has to be freed (even partial)
2795 : */
2796 : static int
2797 0 : ext4_ext_more_to_rm(struct ext4_ext_path *path)
2798 : {
2799 0 : BUG_ON(path->p_idx == NULL);
2800 :
2801 0 : if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
2802 : return 0;
2803 :
2804 : /*
2805 : * if truncate on deeper level happened, it wasn't partial,
2806 : * so we have to consider current index for truncation
2807 : */
2808 0 : if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
2809 0 : return 0;
2810 : return 1;
2811 : }
2812 :
2813 0 : int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2814 : ext4_lblk_t end)
2815 : {
2816 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2817 0 : int depth = ext_depth(inode);
2818 0 : struct ext4_ext_path *path = NULL;
2819 0 : struct partial_cluster partial;
2820 0 : handle_t *handle;
2821 0 : int i = 0, err = 0;
2822 :
2823 0 : partial.pclu = 0;
2824 0 : partial.lblk = 0;
2825 0 : partial.state = initial;
2826 :
2827 0 : ext_debug(inode, "truncate since %u to %u\n", start, end);
2828 :
2829 : /* probably first extent we're gonna free will be last in block */
2830 0 : handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
2831 : depth + 1,
2832 : ext4_free_metadata_revoke_credits(inode->i_sb, depth));
2833 0 : if (IS_ERR(handle))
2834 0 : return PTR_ERR(handle);
2835 :
2836 0 : again:
2837 0 : trace_ext4_ext_remove_space(inode, start, end, depth);
2838 :
2839 : /*
2840 : * Check if we are removing extents inside the extent tree. If that
2841 : * is the case, we are going to punch a hole inside the extent tree
2842 : * so we have to check whether we need to split the extent covering
2843 : * the last block to remove so we can easily remove the part of it
2844 : * in ext4_ext_rm_leaf().
2845 : */
2846 0 : if (end < EXT_MAX_BLOCKS - 1) {
2847 0 : struct ext4_extent *ex;
2848 0 : ext4_lblk_t ee_block, ex_end, lblk;
2849 0 : ext4_fsblk_t pblk;
2850 :
2851 : /* find extent for or closest extent to this block */
2852 0 : path = ext4_find_extent(inode, end, NULL,
2853 : EXT4_EX_NOCACHE | EXT4_EX_NOFAIL);
2854 0 : if (IS_ERR(path)) {
2855 0 : ext4_journal_stop(handle);
2856 0 : return PTR_ERR(path);
2857 : }
2858 0 : depth = ext_depth(inode);
2859 : /* Leaf not may not exist only if inode has no blocks at all */
2860 0 : ex = path[depth].p_ext;
2861 0 : if (!ex) {
2862 0 : if (depth) {
2863 0 : EXT4_ERROR_INODE(inode,
2864 : "path[%d].p_hdr == NULL",
2865 : depth);
2866 0 : err = -EFSCORRUPTED;
2867 : }
2868 0 : goto out;
2869 : }
2870 :
2871 0 : ee_block = le32_to_cpu(ex->ee_block);
2872 0 : ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1;
2873 :
2874 : /*
2875 : * See if the last block is inside the extent, if so split
2876 : * the extent at 'end' block so we can easily remove the
2877 : * tail of the first part of the split extent in
2878 : * ext4_ext_rm_leaf().
2879 : */
2880 0 : if (end >= ee_block && end < ex_end) {
2881 :
2882 : /*
2883 : * If we're going to split the extent, note that
2884 : * the cluster containing the block after 'end' is
2885 : * in use to avoid freeing it when removing blocks.
2886 : */
2887 0 : if (sbi->s_cluster_ratio > 1) {
2888 0 : pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
2889 0 : partial.pclu = EXT4_B2C(sbi, pblk);
2890 0 : partial.state = nofree;
2891 : }
2892 :
2893 : /*
2894 : * Split the extent in two so that 'end' is the last
2895 : * block in the first new extent. Also we should not
2896 : * fail removing space due to ENOSPC so try to use
2897 : * reserved block if that happens.
2898 : */
2899 0 : err = ext4_force_split_extent_at(handle, inode, &path,
2900 : end + 1, 1);
2901 0 : if (err < 0)
2902 0 : goto out;
2903 :
2904 0 : } else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
2905 0 : partial.state == initial) {
2906 : /*
2907 : * If we're punching, there's an extent to the right.
2908 : * If the partial cluster hasn't been set, set it to
2909 : * that extent's first cluster and its state to nofree
2910 : * so it won't be freed should it contain blocks to be
2911 : * removed. If it's already set (tofree/nofree), we're
2912 : * retrying and keep the original partial cluster info
2913 : * so a cluster marked tofree as a result of earlier
2914 : * extent removal is not lost.
2915 : */
2916 0 : lblk = ex_end + 1;
2917 0 : err = ext4_ext_search_right(inode, path, &lblk, &pblk,
2918 : NULL);
2919 0 : if (err < 0)
2920 0 : goto out;
2921 0 : if (pblk) {
2922 0 : partial.pclu = EXT4_B2C(sbi, pblk);
2923 0 : partial.state = nofree;
2924 : }
2925 : }
2926 : }
2927 : /*
2928 : * We start scanning from right side, freeing all the blocks
2929 : * after i_size and walking into the tree depth-wise.
2930 : */
2931 0 : depth = ext_depth(inode);
2932 0 : if (path) {
2933 : int k = i = depth;
2934 0 : while (--k > 0)
2935 0 : path[k].p_block =
2936 0 : le16_to_cpu(path[k].p_hdr->eh_entries)+1;
2937 : } else {
2938 0 : path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
2939 : GFP_NOFS | __GFP_NOFAIL);
2940 0 : if (path == NULL) {
2941 0 : ext4_journal_stop(handle);
2942 0 : return -ENOMEM;
2943 : }
2944 0 : path[0].p_maxdepth = path[0].p_depth = depth;
2945 0 : path[0].p_hdr = ext_inode_hdr(inode);
2946 0 : i = 0;
2947 :
2948 0 : if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
2949 0 : err = -EFSCORRUPTED;
2950 0 : goto out;
2951 : }
2952 : }
2953 : err = 0;
2954 :
2955 0 : while (i >= 0 && err == 0) {
2956 0 : if (i == depth) {
2957 : /* this is leaf block */
2958 0 : err = ext4_ext_rm_leaf(handle, inode, path,
2959 : &partial, start, end);
2960 : /* root level has p_bh == NULL, brelse() eats this */
2961 0 : brelse(path[i].p_bh);
2962 0 : path[i].p_bh = NULL;
2963 0 : i--;
2964 0 : continue;
2965 : }
2966 :
2967 : /* this is index block */
2968 0 : if (!path[i].p_hdr) {
2969 0 : ext_debug(inode, "initialize header\n");
2970 0 : path[i].p_hdr = ext_block_hdr(path[i].p_bh);
2971 : }
2972 :
2973 0 : if (!path[i].p_idx) {
2974 : /* this level hasn't been touched yet */
2975 0 : path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
2976 0 : path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
2977 0 : ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
2978 : path[i].p_hdr,
2979 : le16_to_cpu(path[i].p_hdr->eh_entries));
2980 : } else {
2981 : /* we were already here, see at next index */
2982 0 : path[i].p_idx--;
2983 : }
2984 :
2985 0 : ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
2986 : i, EXT_FIRST_INDEX(path[i].p_hdr),
2987 : path[i].p_idx);
2988 0 : if (ext4_ext_more_to_rm(path + i)) {
2989 0 : struct buffer_head *bh;
2990 : /* go to the next level */
2991 0 : ext_debug(inode, "move to level %d (block %llu)\n",
2992 : i + 1, ext4_idx_pblock(path[i].p_idx));
2993 0 : memset(path + i + 1, 0, sizeof(*path));
2994 0 : bh = read_extent_tree_block(inode, path[i].p_idx,
2995 : depth - i - 1,
2996 : EXT4_EX_NOCACHE);
2997 0 : if (IS_ERR(bh)) {
2998 : /* should we reset i_size? */
2999 0 : err = PTR_ERR(bh);
3000 0 : break;
3001 : }
3002 : /* Yield here to deal with large extent trees.
3003 : * Should be a no-op if we did IO above. */
3004 0 : cond_resched();
3005 0 : if (WARN_ON(i + 1 > depth)) {
3006 : err = -EFSCORRUPTED;
3007 : break;
3008 : }
3009 0 : path[i + 1].p_bh = bh;
3010 :
3011 : /* save actual number of indexes since this
3012 : * number is changed at the next iteration */
3013 0 : path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
3014 0 : i++;
3015 : } else {
3016 : /* we finished processing this index, go up */
3017 0 : if (path[i].p_hdr->eh_entries == 0 && i > 0) {
3018 : /* index is empty, remove it;
3019 : * handle must be already prepared by the
3020 : * truncatei_leaf() */
3021 0 : err = ext4_ext_rm_idx(handle, inode, path, i);
3022 : }
3023 : /* root level has p_bh == NULL, brelse() eats this */
3024 0 : brelse(path[i].p_bh);
3025 0 : path[i].p_bh = NULL;
3026 0 : i--;
3027 0 : ext_debug(inode, "return to level %d\n", i);
3028 : }
3029 : }
3030 :
3031 0 : trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial,
3032 0 : path->p_hdr->eh_entries);
3033 :
3034 : /*
3035 : * if there's a partial cluster and we have removed the first extent
3036 : * in the file, then we also free the partial cluster, if any
3037 : */
3038 0 : if (partial.state == tofree && err == 0) {
3039 0 : int flags = get_default_free_blocks_flags(inode);
3040 :
3041 0 : if (ext4_is_pending(inode, partial.lblk))
3042 0 : flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
3043 0 : ext4_free_blocks(handle, inode, NULL,
3044 0 : EXT4_C2B(sbi, partial.pclu),
3045 0 : sbi->s_cluster_ratio, flags);
3046 0 : if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
3047 0 : ext4_rereserve_cluster(inode, partial.lblk);
3048 0 : partial.state = initial;
3049 : }
3050 :
3051 : /* TODO: flexible tree reduction should be here */
3052 0 : if (path->p_hdr->eh_entries == 0) {
3053 : /*
3054 : * truncate to zero freed all the tree,
3055 : * so we need to correct eh_depth
3056 : */
3057 0 : err = ext4_ext_get_access(handle, inode, path);
3058 0 : if (err == 0) {
3059 0 : ext_inode_hdr(inode)->eh_depth = 0;
3060 0 : ext_inode_hdr(inode)->eh_max =
3061 : cpu_to_le16(ext4_ext_space_root(inode, 0));
3062 0 : err = ext4_ext_dirty(handle, inode, path);
3063 : }
3064 : }
3065 0 : out:
3066 0 : ext4_free_ext_path(path);
3067 0 : path = NULL;
3068 0 : if (err == -EAGAIN)
3069 0 : goto again;
3070 0 : ext4_journal_stop(handle);
3071 :
3072 0 : return err;
3073 : }
3074 :
3075 : /*
3076 : * called at mount time
3077 : */
3078 0 : void ext4_ext_init(struct super_block *sb)
3079 : {
3080 : /*
3081 : * possible initialization would be here
3082 : */
3083 :
3084 0 : if (ext4_has_feature_extents(sb)) {
3085 : #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
3086 : printk(KERN_INFO "EXT4-fs: file extents enabled"
3087 : #ifdef AGGRESSIVE_TEST
3088 : ", aggressive tests"
3089 : #endif
3090 : #ifdef CHECK_BINSEARCH
3091 : ", check binsearch"
3092 : #endif
3093 : #ifdef EXTENTS_STATS
3094 : ", stats"
3095 : #endif
3096 : "\n");
3097 : #endif
3098 : #ifdef EXTENTS_STATS
3099 : spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
3100 : EXT4_SB(sb)->s_ext_min = 1 << 30;
3101 : EXT4_SB(sb)->s_ext_max = 0;
3102 : #endif
3103 0 : }
3104 0 : }
3105 :
3106 : /*
3107 : * called at umount time
3108 : */
3109 0 : void ext4_ext_release(struct super_block *sb)
3110 : {
3111 0 : if (!ext4_has_feature_extents(sb))
3112 : return;
3113 :
3114 : #ifdef EXTENTS_STATS
3115 : if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
3116 : struct ext4_sb_info *sbi = EXT4_SB(sb);
3117 : printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
3118 : sbi->s_ext_blocks, sbi->s_ext_extents,
3119 : sbi->s_ext_blocks / sbi->s_ext_extents);
3120 : printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
3121 : sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
3122 : }
3123 : #endif
3124 : }
3125 :
3126 0 : static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
3127 : {
3128 0 : ext4_lblk_t ee_block;
3129 0 : ext4_fsblk_t ee_pblock;
3130 0 : unsigned int ee_len;
3131 :
3132 0 : ee_block = le32_to_cpu(ex->ee_block);
3133 0 : ee_len = ext4_ext_get_actual_len(ex);
3134 0 : ee_pblock = ext4_ext_pblock(ex);
3135 :
3136 0 : if (ee_len == 0)
3137 : return;
3138 :
3139 0 : ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
3140 : EXTENT_STATUS_WRITTEN);
3141 : }
3142 :
3143 : /* FIXME!! we need to try to merge to left or right after zero-out */
3144 0 : static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
3145 : {
3146 0 : ext4_fsblk_t ee_pblock;
3147 0 : unsigned int ee_len;
3148 :
3149 0 : ee_len = ext4_ext_get_actual_len(ex);
3150 0 : ee_pblock = ext4_ext_pblock(ex);
3151 0 : return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
3152 : ee_len);
3153 : }
3154 :
3155 : /*
3156 : * ext4_split_extent_at() splits an extent at given block.
3157 : *
3158 : * @handle: the journal handle
3159 : * @inode: the file inode
3160 : * @path: the path to the extent
3161 : * @split: the logical block where the extent is splitted.
3162 : * @split_flags: indicates if the extent could be zeroout if split fails, and
3163 : * the states(init or unwritten) of new extents.
3164 : * @flags: flags used to insert new extent to extent tree.
3165 : *
3166 : *
3167 : * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
3168 : * of which are determined by split_flag.
3169 : *
3170 : * There are two cases:
3171 : * a> the extent are splitted into two extent.
3172 : * b> split is not needed, and just mark the extent.
3173 : *
3174 : * return 0 on success.
3175 : */
3176 0 : static int ext4_split_extent_at(handle_t *handle,
3177 : struct inode *inode,
3178 : struct ext4_ext_path **ppath,
3179 : ext4_lblk_t split,
3180 : int split_flag,
3181 : int flags)
3182 : {
3183 0 : struct ext4_ext_path *path = *ppath;
3184 0 : ext4_fsblk_t newblock;
3185 0 : ext4_lblk_t ee_block;
3186 0 : struct ext4_extent *ex, newex, orig_ex, zero_ex;
3187 0 : struct ext4_extent *ex2 = NULL;
3188 0 : unsigned int ee_len, depth;
3189 0 : int err = 0;
3190 :
3191 0 : BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
3192 : (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
3193 :
3194 0 : ext_debug(inode, "logical block %llu\n", (unsigned long long)split);
3195 :
3196 0 : ext4_ext_show_leaf(inode, path);
3197 :
3198 0 : depth = ext_depth(inode);
3199 0 : ex = path[depth].p_ext;
3200 0 : ee_block = le32_to_cpu(ex->ee_block);
3201 0 : ee_len = ext4_ext_get_actual_len(ex);
3202 0 : newblock = split - ee_block + ext4_ext_pblock(ex);
3203 :
3204 0 : BUG_ON(split < ee_block || split >= (ee_block + ee_len));
3205 0 : BUG_ON(!ext4_ext_is_unwritten(ex) &&
3206 : split_flag & (EXT4_EXT_MAY_ZEROOUT |
3207 : EXT4_EXT_MARK_UNWRIT1 |
3208 : EXT4_EXT_MARK_UNWRIT2));
3209 :
3210 0 : err = ext4_ext_get_access(handle, inode, path + depth);
3211 0 : if (err)
3212 0 : goto out;
3213 :
3214 0 : if (split == ee_block) {
3215 : /*
3216 : * case b: block @split is the block that the extent begins with
3217 : * then we just change the state of the extent, and splitting
3218 : * is not needed.
3219 : */
3220 0 : if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3221 0 : ext4_ext_mark_unwritten(ex);
3222 : else
3223 0 : ext4_ext_mark_initialized(ex);
3224 :
3225 0 : if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
3226 0 : ext4_ext_try_to_merge(handle, inode, path, ex);
3227 :
3228 0 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3229 0 : goto out;
3230 : }
3231 :
3232 : /* case a */
3233 0 : memcpy(&orig_ex, ex, sizeof(orig_ex));
3234 0 : ex->ee_len = cpu_to_le16(split - ee_block);
3235 0 : if (split_flag & EXT4_EXT_MARK_UNWRIT1)
3236 0 : ext4_ext_mark_unwritten(ex);
3237 :
3238 : /*
3239 : * path may lead to new leaf, not to original leaf any more
3240 : * after ext4_ext_insert_extent() returns,
3241 : */
3242 0 : err = ext4_ext_dirty(handle, inode, path + depth);
3243 0 : if (err)
3244 0 : goto fix_extent_len;
3245 :
3246 0 : ex2 = &newex;
3247 0 : ex2->ee_block = cpu_to_le32(split);
3248 0 : ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
3249 0 : ext4_ext_store_pblock(ex2, newblock);
3250 0 : if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3251 0 : ext4_ext_mark_unwritten(ex2);
3252 :
3253 0 : err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
3254 0 : if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM)
3255 0 : goto out;
3256 :
3257 0 : if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
3258 0 : if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
3259 0 : if (split_flag & EXT4_EXT_DATA_VALID1) {
3260 0 : err = ext4_ext_zeroout(inode, ex2);
3261 0 : zero_ex.ee_block = ex2->ee_block;
3262 0 : zero_ex.ee_len = cpu_to_le16(
3263 : ext4_ext_get_actual_len(ex2));
3264 0 : ext4_ext_store_pblock(&zero_ex,
3265 : ext4_ext_pblock(ex2));
3266 : } else {
3267 0 : err = ext4_ext_zeroout(inode, ex);
3268 0 : zero_ex.ee_block = ex->ee_block;
3269 0 : zero_ex.ee_len = cpu_to_le16(
3270 : ext4_ext_get_actual_len(ex));
3271 0 : ext4_ext_store_pblock(&zero_ex,
3272 : ext4_ext_pblock(ex));
3273 : }
3274 : } else {
3275 0 : err = ext4_ext_zeroout(inode, &orig_ex);
3276 0 : zero_ex.ee_block = orig_ex.ee_block;
3277 0 : zero_ex.ee_len = cpu_to_le16(
3278 : ext4_ext_get_actual_len(&orig_ex));
3279 0 : ext4_ext_store_pblock(&zero_ex,
3280 : ext4_ext_pblock(&orig_ex));
3281 : }
3282 :
3283 0 : if (!err) {
3284 : /* update the extent length and mark as initialized */
3285 0 : ex->ee_len = cpu_to_le16(ee_len);
3286 0 : ext4_ext_try_to_merge(handle, inode, path, ex);
3287 0 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3288 0 : if (!err)
3289 : /* update extent status tree */
3290 0 : ext4_zeroout_es(inode, &zero_ex);
3291 : /* If we failed at this point, we don't know in which
3292 : * state the extent tree exactly is so don't try to fix
3293 : * length of the original extent as it may do even more
3294 : * damage.
3295 : */
3296 0 : goto out;
3297 : }
3298 : }
3299 :
3300 0 : fix_extent_len:
3301 0 : ex->ee_len = orig_ex.ee_len;
3302 : /*
3303 : * Ignore ext4_ext_dirty return value since we are already in error path
3304 : * and err is a non-zero error code.
3305 : */
3306 0 : ext4_ext_dirty(handle, inode, path + path->p_depth);
3307 0 : return err;
3308 : out:
3309 : ext4_ext_show_leaf(inode, path);
3310 : return err;
3311 : }
3312 :
3313 : /*
3314 : * ext4_split_extents() splits an extent and mark extent which is covered
3315 : * by @map as split_flags indicates
3316 : *
3317 : * It may result in splitting the extent into multiple extents (up to three)
3318 : * There are three possibilities:
3319 : * a> There is no split required
3320 : * b> Splits in two extents: Split is happening at either end of the extent
3321 : * c> Splits in three extents: Somone is splitting in middle of the extent
3322 : *
3323 : */
3324 0 : static int ext4_split_extent(handle_t *handle,
3325 : struct inode *inode,
3326 : struct ext4_ext_path **ppath,
3327 : struct ext4_map_blocks *map,
3328 : int split_flag,
3329 : int flags)
3330 : {
3331 0 : struct ext4_ext_path *path = *ppath;
3332 0 : ext4_lblk_t ee_block;
3333 0 : struct ext4_extent *ex;
3334 0 : unsigned int ee_len, depth;
3335 0 : int err = 0;
3336 0 : int unwritten;
3337 0 : int split_flag1, flags1;
3338 0 : int allocated = map->m_len;
3339 :
3340 0 : depth = ext_depth(inode);
3341 0 : ex = path[depth].p_ext;
3342 0 : ee_block = le32_to_cpu(ex->ee_block);
3343 0 : ee_len = ext4_ext_get_actual_len(ex);
3344 0 : unwritten = ext4_ext_is_unwritten(ex);
3345 :
3346 0 : if (map->m_lblk + map->m_len < ee_block + ee_len) {
3347 0 : split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
3348 0 : flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
3349 0 : if (unwritten)
3350 0 : split_flag1 |= EXT4_EXT_MARK_UNWRIT1 |
3351 : EXT4_EXT_MARK_UNWRIT2;
3352 0 : if (split_flag & EXT4_EXT_DATA_VALID2)
3353 0 : split_flag1 |= EXT4_EXT_DATA_VALID1;
3354 0 : err = ext4_split_extent_at(handle, inode, ppath,
3355 : map->m_lblk + map->m_len, split_flag1, flags1);
3356 0 : if (err)
3357 0 : goto out;
3358 : } else {
3359 0 : allocated = ee_len - (map->m_lblk - ee_block);
3360 : }
3361 : /*
3362 : * Update path is required because previous ext4_split_extent_at() may
3363 : * result in split of original leaf or extent zeroout.
3364 : */
3365 0 : path = ext4_find_extent(inode, map->m_lblk, ppath, flags);
3366 0 : if (IS_ERR(path))
3367 0 : return PTR_ERR(path);
3368 0 : depth = ext_depth(inode);
3369 0 : ex = path[depth].p_ext;
3370 0 : if (!ex) {
3371 0 : EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3372 : (unsigned long) map->m_lblk);
3373 0 : return -EFSCORRUPTED;
3374 : }
3375 0 : unwritten = ext4_ext_is_unwritten(ex);
3376 :
3377 0 : if (map->m_lblk >= ee_block) {
3378 0 : split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
3379 0 : if (unwritten) {
3380 0 : split_flag1 |= EXT4_EXT_MARK_UNWRIT1;
3381 0 : split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
3382 : EXT4_EXT_MARK_UNWRIT2);
3383 : }
3384 0 : err = ext4_split_extent_at(handle, inode, ppath,
3385 : map->m_lblk, split_flag1, flags);
3386 0 : if (err)
3387 0 : goto out;
3388 : }
3389 :
3390 0 : ext4_ext_show_leaf(inode, path);
3391 0 : out:
3392 0 : return err ? err : allocated;
3393 : }
3394 :
3395 : /*
3396 : * This function is called by ext4_ext_map_blocks() if someone tries to write
3397 : * to an unwritten extent. It may result in splitting the unwritten
3398 : * extent into multiple extents (up to three - one initialized and two
3399 : * unwritten).
3400 : * There are three possibilities:
3401 : * a> There is no split required: Entire extent should be initialized
3402 : * b> Splits in two extents: Write is happening at either end of the extent
3403 : * c> Splits in three extents: Somone is writing in middle of the extent
3404 : *
3405 : * Pre-conditions:
3406 : * - The extent pointed to by 'path' is unwritten.
3407 : * - The extent pointed to by 'path' contains a superset
3408 : * of the logical span [map->m_lblk, map->m_lblk + map->m_len).
3409 : *
3410 : * Post-conditions on success:
3411 : * - the returned value is the number of blocks beyond map->l_lblk
3412 : * that are allocated and initialized.
3413 : * It is guaranteed to be >= map->m_len.
3414 : */
3415 0 : static int ext4_ext_convert_to_initialized(handle_t *handle,
3416 : struct inode *inode,
3417 : struct ext4_map_blocks *map,
3418 : struct ext4_ext_path **ppath,
3419 : int flags)
3420 : {
3421 0 : struct ext4_ext_path *path = *ppath;
3422 0 : struct ext4_sb_info *sbi;
3423 0 : struct ext4_extent_header *eh;
3424 0 : struct ext4_map_blocks split_map;
3425 0 : struct ext4_extent zero_ex1, zero_ex2;
3426 0 : struct ext4_extent *ex, *abut_ex;
3427 0 : ext4_lblk_t ee_block, eof_block;
3428 0 : unsigned int ee_len, depth, map_len = map->m_len;
3429 0 : int allocated = 0, max_zeroout = 0;
3430 0 : int err = 0;
3431 0 : int split_flag = EXT4_EXT_DATA_VALID2;
3432 :
3433 0 : ext_debug(inode, "logical block %llu, max_blocks %u\n",
3434 : (unsigned long long)map->m_lblk, map_len);
3435 :
3436 0 : sbi = EXT4_SB(inode->i_sb);
3437 0 : eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
3438 0 : >> inode->i_sb->s_blocksize_bits;
3439 0 : if (eof_block < map->m_lblk + map_len)
3440 : eof_block = map->m_lblk + map_len;
3441 :
3442 0 : depth = ext_depth(inode);
3443 0 : eh = path[depth].p_hdr;
3444 0 : ex = path[depth].p_ext;
3445 0 : ee_block = le32_to_cpu(ex->ee_block);
3446 0 : ee_len = ext4_ext_get_actual_len(ex);
3447 0 : zero_ex1.ee_len = 0;
3448 0 : zero_ex2.ee_len = 0;
3449 :
3450 0 : trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
3451 :
3452 : /* Pre-conditions */
3453 0 : BUG_ON(!ext4_ext_is_unwritten(ex));
3454 0 : BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
3455 :
3456 : /*
3457 : * Attempt to transfer newly initialized blocks from the currently
3458 : * unwritten extent to its neighbor. This is much cheaper
3459 : * than an insertion followed by a merge as those involve costly
3460 : * memmove() calls. Transferring to the left is the common case in
3461 : * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
3462 : * followed by append writes.
3463 : *
3464 : * Limitations of the current logic:
3465 : * - L1: we do not deal with writes covering the whole extent.
3466 : * This would require removing the extent if the transfer
3467 : * is possible.
3468 : * - L2: we only attempt to merge with an extent stored in the
3469 : * same extent tree node.
3470 : */
3471 0 : if ((map->m_lblk == ee_block) &&
3472 : /* See if we can merge left */
3473 0 : (map_len < ee_len) && /*L1*/
3474 0 : (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/
3475 0 : ext4_lblk_t prev_lblk;
3476 0 : ext4_fsblk_t prev_pblk, ee_pblk;
3477 0 : unsigned int prev_len;
3478 :
3479 0 : abut_ex = ex - 1;
3480 0 : prev_lblk = le32_to_cpu(abut_ex->ee_block);
3481 0 : prev_len = ext4_ext_get_actual_len(abut_ex);
3482 0 : prev_pblk = ext4_ext_pblock(abut_ex);
3483 0 : ee_pblk = ext4_ext_pblock(ex);
3484 :
3485 : /*
3486 : * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3487 : * upon those conditions:
3488 : * - C1: abut_ex is initialized,
3489 : * - C2: abut_ex is logically abutting ex,
3490 : * - C3: abut_ex is physically abutting ex,
3491 : * - C4: abut_ex can receive the additional blocks without
3492 : * overflowing the (initialized) length limit.
3493 : */
3494 0 : if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
3495 0 : ((prev_lblk + prev_len) == ee_block) && /*C2*/
3496 0 : ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
3497 0 : (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3498 0 : err = ext4_ext_get_access(handle, inode, path + depth);
3499 0 : if (err)
3500 0 : goto out;
3501 :
3502 0 : trace_ext4_ext_convert_to_initialized_fastpath(inode,
3503 : map, ex, abut_ex);
3504 :
3505 : /* Shift the start of ex by 'map_len' blocks */
3506 0 : ex->ee_block = cpu_to_le32(ee_block + map_len);
3507 0 : ext4_ext_store_pblock(ex, ee_pblk + map_len);
3508 0 : ex->ee_len = cpu_to_le16(ee_len - map_len);
3509 0 : ext4_ext_mark_unwritten(ex); /* Restore the flag */
3510 :
3511 : /* Extend abut_ex by 'map_len' blocks */
3512 0 : abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
3513 :
3514 : /* Result: number of initialized blocks past m_lblk */
3515 0 : allocated = map_len;
3516 : }
3517 0 : } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
3518 0 : (map_len < ee_len) && /*L1*/
3519 0 : ex < EXT_LAST_EXTENT(eh)) { /*L2*/
3520 : /* See if we can merge right */
3521 0 : ext4_lblk_t next_lblk;
3522 0 : ext4_fsblk_t next_pblk, ee_pblk;
3523 0 : unsigned int next_len;
3524 :
3525 0 : abut_ex = ex + 1;
3526 0 : next_lblk = le32_to_cpu(abut_ex->ee_block);
3527 0 : next_len = ext4_ext_get_actual_len(abut_ex);
3528 0 : next_pblk = ext4_ext_pblock(abut_ex);
3529 0 : ee_pblk = ext4_ext_pblock(ex);
3530 :
3531 : /*
3532 : * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3533 : * upon those conditions:
3534 : * - C1: abut_ex is initialized,
3535 : * - C2: abut_ex is logically abutting ex,
3536 : * - C3: abut_ex is physically abutting ex,
3537 : * - C4: abut_ex can receive the additional blocks without
3538 : * overflowing the (initialized) length limit.
3539 : */
3540 0 : if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
3541 0 : ((map->m_lblk + map_len) == next_lblk) && /*C2*/
3542 0 : ((ee_pblk + ee_len) == next_pblk) && /*C3*/
3543 0 : (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3544 0 : err = ext4_ext_get_access(handle, inode, path + depth);
3545 0 : if (err)
3546 0 : goto out;
3547 :
3548 0 : trace_ext4_ext_convert_to_initialized_fastpath(inode,
3549 : map, ex, abut_ex);
3550 :
3551 : /* Shift the start of abut_ex by 'map_len' blocks */
3552 0 : abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
3553 0 : ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
3554 0 : ex->ee_len = cpu_to_le16(ee_len - map_len);
3555 0 : ext4_ext_mark_unwritten(ex); /* Restore the flag */
3556 :
3557 : /* Extend abut_ex by 'map_len' blocks */
3558 0 : abut_ex->ee_len = cpu_to_le16(next_len + map_len);
3559 :
3560 : /* Result: number of initialized blocks past m_lblk */
3561 0 : allocated = map_len;
3562 : }
3563 : }
3564 0 : if (allocated) {
3565 : /* Mark the block containing both extents as dirty */
3566 0 : err = ext4_ext_dirty(handle, inode, path + depth);
3567 :
3568 : /* Update path to point to the right extent */
3569 0 : path[depth].p_ext = abut_ex;
3570 0 : goto out;
3571 : } else
3572 0 : allocated = ee_len - (map->m_lblk - ee_block);
3573 :
3574 0 : WARN_ON(map->m_lblk < ee_block);
3575 : /*
3576 : * It is safe to convert extent to initialized via explicit
3577 : * zeroout only if extent is fully inside i_size or new_size.
3578 : */
3579 0 : split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3580 :
3581 0 : if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3582 0 : max_zeroout = sbi->s_extent_max_zeroout_kb >>
3583 0 : (inode->i_sb->s_blocksize_bits - 10);
3584 :
3585 : /*
3586 : * five cases:
3587 : * 1. split the extent into three extents.
3588 : * 2. split the extent into two extents, zeroout the head of the first
3589 : * extent.
3590 : * 3. split the extent into two extents, zeroout the tail of the second
3591 : * extent.
3592 : * 4. split the extent into two extents with out zeroout.
3593 : * 5. no splitting needed, just possibly zeroout the head and / or the
3594 : * tail of the extent.
3595 : */
3596 0 : split_map.m_lblk = map->m_lblk;
3597 0 : split_map.m_len = map->m_len;
3598 :
3599 0 : if (max_zeroout && (allocated > split_map.m_len)) {
3600 0 : if (allocated <= max_zeroout) {
3601 : /* case 3 or 5 */
3602 0 : zero_ex1.ee_block =
3603 0 : cpu_to_le32(split_map.m_lblk +
3604 : split_map.m_len);
3605 0 : zero_ex1.ee_len =
3606 0 : cpu_to_le16(allocated - split_map.m_len);
3607 0 : ext4_ext_store_pblock(&zero_ex1,
3608 0 : ext4_ext_pblock(ex) + split_map.m_lblk +
3609 0 : split_map.m_len - ee_block);
3610 0 : err = ext4_ext_zeroout(inode, &zero_ex1);
3611 0 : if (err)
3612 0 : goto fallback;
3613 0 : split_map.m_len = allocated;
3614 : }
3615 0 : if (split_map.m_lblk - ee_block + split_map.m_len <
3616 : max_zeroout) {
3617 : /* case 2 or 5 */
3618 0 : if (split_map.m_lblk != ee_block) {
3619 0 : zero_ex2.ee_block = ex->ee_block;
3620 0 : zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
3621 : ee_block);
3622 0 : ext4_ext_store_pblock(&zero_ex2,
3623 : ext4_ext_pblock(ex));
3624 0 : err = ext4_ext_zeroout(inode, &zero_ex2);
3625 0 : if (err)
3626 0 : goto fallback;
3627 : }
3628 :
3629 0 : split_map.m_len += split_map.m_lblk - ee_block;
3630 0 : split_map.m_lblk = ee_block;
3631 0 : allocated = map->m_len;
3632 : }
3633 : }
3634 :
3635 0 : fallback:
3636 0 : err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
3637 : flags);
3638 0 : if (err > 0)
3639 : err = 0;
3640 0 : out:
3641 : /* If we have gotten a failure, don't zero out status tree */
3642 0 : if (!err) {
3643 0 : ext4_zeroout_es(inode, &zero_ex1);
3644 0 : ext4_zeroout_es(inode, &zero_ex2);
3645 : }
3646 0 : return err ? err : allocated;
3647 : }
3648 :
3649 : /*
3650 : * This function is called by ext4_ext_map_blocks() from
3651 : * ext4_get_blocks_dio_write() when DIO to write
3652 : * to an unwritten extent.
3653 : *
3654 : * Writing to an unwritten extent may result in splitting the unwritten
3655 : * extent into multiple initialized/unwritten extents (up to three)
3656 : * There are three possibilities:
3657 : * a> There is no split required: Entire extent should be unwritten
3658 : * b> Splits in two extents: Write is happening at either end of the extent
3659 : * c> Splits in three extents: Somone is writing in middle of the extent
3660 : *
3661 : * This works the same way in the case of initialized -> unwritten conversion.
3662 : *
3663 : * One of more index blocks maybe needed if the extent tree grow after
3664 : * the unwritten extent split. To prevent ENOSPC occur at the IO
3665 : * complete, we need to split the unwritten extent before DIO submit
3666 : * the IO. The unwritten extent called at this time will be split
3667 : * into three unwritten extent(at most). After IO complete, the part
3668 : * being filled will be convert to initialized by the end_io callback function
3669 : * via ext4_convert_unwritten_extents().
3670 : *
3671 : * Returns the size of unwritten extent to be written on success.
3672 : */
3673 0 : static int ext4_split_convert_extents(handle_t *handle,
3674 : struct inode *inode,
3675 : struct ext4_map_blocks *map,
3676 : struct ext4_ext_path **ppath,
3677 : int flags)
3678 : {
3679 0 : struct ext4_ext_path *path = *ppath;
3680 0 : ext4_lblk_t eof_block;
3681 0 : ext4_lblk_t ee_block;
3682 0 : struct ext4_extent *ex;
3683 0 : unsigned int ee_len;
3684 0 : int split_flag = 0, depth;
3685 :
3686 0 : ext_debug(inode, "logical block %llu, max_blocks %u\n",
3687 : (unsigned long long)map->m_lblk, map->m_len);
3688 :
3689 0 : eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
3690 0 : >> inode->i_sb->s_blocksize_bits;
3691 0 : if (eof_block < map->m_lblk + map->m_len)
3692 : eof_block = map->m_lblk + map->m_len;
3693 : /*
3694 : * It is safe to convert extent to initialized via explicit
3695 : * zeroout only if extent is fully inside i_size or new_size.
3696 : */
3697 0 : depth = ext_depth(inode);
3698 0 : ex = path[depth].p_ext;
3699 0 : ee_block = le32_to_cpu(ex->ee_block);
3700 0 : ee_len = ext4_ext_get_actual_len(ex);
3701 :
3702 : /* Convert to unwritten */
3703 0 : if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
3704 : split_flag |= EXT4_EXT_DATA_VALID1;
3705 : /* Convert to initialized */
3706 0 : } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
3707 0 : split_flag |= ee_block + ee_len <= eof_block ?
3708 0 : EXT4_EXT_MAY_ZEROOUT : 0;
3709 0 : split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
3710 : }
3711 0 : flags |= EXT4_GET_BLOCKS_PRE_IO;
3712 0 : return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
3713 : }
3714 :
3715 0 : static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3716 : struct inode *inode,
3717 : struct ext4_map_blocks *map,
3718 : struct ext4_ext_path **ppath)
3719 : {
3720 0 : struct ext4_ext_path *path = *ppath;
3721 0 : struct ext4_extent *ex;
3722 0 : ext4_lblk_t ee_block;
3723 0 : unsigned int ee_len;
3724 0 : int depth;
3725 0 : int err = 0;
3726 :
3727 0 : depth = ext_depth(inode);
3728 0 : ex = path[depth].p_ext;
3729 0 : ee_block = le32_to_cpu(ex->ee_block);
3730 0 : ee_len = ext4_ext_get_actual_len(ex);
3731 :
3732 0 : ext_debug(inode, "logical block %llu, max_blocks %u\n",
3733 : (unsigned long long)ee_block, ee_len);
3734 :
3735 : /* If extent is larger than requested it is a clear sign that we still
3736 : * have some extent state machine issues left. So extent_split is still
3737 : * required.
3738 : * TODO: Once all related issues will be fixed this situation should be
3739 : * illegal.
3740 : */
3741 0 : if (ee_block != map->m_lblk || ee_len > map->m_len) {
3742 : #ifdef CONFIG_EXT4_DEBUG
3743 0 : ext4_warning(inode->i_sb, "Inode (%ld) finished: extent logical block %llu,"
3744 : " len %u; IO logical block %llu, len %u",
3745 : inode->i_ino, (unsigned long long)ee_block, ee_len,
3746 : (unsigned long long)map->m_lblk, map->m_len);
3747 : #endif
3748 0 : err = ext4_split_convert_extents(handle, inode, map, ppath,
3749 : EXT4_GET_BLOCKS_CONVERT);
3750 0 : if (err < 0)
3751 : return err;
3752 0 : path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3753 0 : if (IS_ERR(path))
3754 0 : return PTR_ERR(path);
3755 0 : depth = ext_depth(inode);
3756 0 : ex = path[depth].p_ext;
3757 : }
3758 :
3759 0 : err = ext4_ext_get_access(handle, inode, path + depth);
3760 0 : if (err)
3761 0 : goto out;
3762 : /* first mark the extent as initialized */
3763 0 : ext4_ext_mark_initialized(ex);
3764 :
3765 : /* note: ext4_ext_correct_indexes() isn't needed here because
3766 : * borders are not changed
3767 : */
3768 0 : ext4_ext_try_to_merge(handle, inode, path, ex);
3769 :
3770 : /* Mark modified extent as dirty */
3771 0 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3772 : out:
3773 : ext4_ext_show_leaf(inode, path);
3774 : return err;
3775 : }
3776 :
3777 : static int
3778 0 : convert_initialized_extent(handle_t *handle, struct inode *inode,
3779 : struct ext4_map_blocks *map,
3780 : struct ext4_ext_path **ppath,
3781 : unsigned int *allocated)
3782 : {
3783 0 : struct ext4_ext_path *path = *ppath;
3784 0 : struct ext4_extent *ex;
3785 0 : ext4_lblk_t ee_block;
3786 0 : unsigned int ee_len;
3787 0 : int depth;
3788 0 : int err = 0;
3789 :
3790 : /*
3791 : * Make sure that the extent is no bigger than we support with
3792 : * unwritten extent
3793 : */
3794 0 : if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
3795 0 : map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
3796 :
3797 0 : depth = ext_depth(inode);
3798 0 : ex = path[depth].p_ext;
3799 0 : ee_block = le32_to_cpu(ex->ee_block);
3800 0 : ee_len = ext4_ext_get_actual_len(ex);
3801 :
3802 0 : ext_debug(inode, "logical block %llu, max_blocks %u\n",
3803 : (unsigned long long)ee_block, ee_len);
3804 :
3805 0 : if (ee_block != map->m_lblk || ee_len > map->m_len) {
3806 0 : err = ext4_split_convert_extents(handle, inode, map, ppath,
3807 : EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3808 0 : if (err < 0)
3809 : return err;
3810 0 : path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3811 0 : if (IS_ERR(path))
3812 0 : return PTR_ERR(path);
3813 0 : depth = ext_depth(inode);
3814 0 : ex = path[depth].p_ext;
3815 0 : if (!ex) {
3816 0 : EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3817 : (unsigned long) map->m_lblk);
3818 0 : return -EFSCORRUPTED;
3819 : }
3820 : }
3821 :
3822 0 : err = ext4_ext_get_access(handle, inode, path + depth);
3823 0 : if (err)
3824 : return err;
3825 : /* first mark the extent as unwritten */
3826 0 : ext4_ext_mark_unwritten(ex);
3827 :
3828 : /* note: ext4_ext_correct_indexes() isn't needed here because
3829 : * borders are not changed
3830 : */
3831 0 : ext4_ext_try_to_merge(handle, inode, path, ex);
3832 :
3833 : /* Mark modified extent as dirty */
3834 0 : err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3835 0 : if (err)
3836 : return err;
3837 0 : ext4_ext_show_leaf(inode, path);
3838 :
3839 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
3840 :
3841 0 : map->m_flags |= EXT4_MAP_UNWRITTEN;
3842 0 : if (*allocated > map->m_len)
3843 0 : *allocated = map->m_len;
3844 0 : map->m_len = *allocated;
3845 0 : return 0;
3846 : }
3847 :
3848 : static int
3849 0 : ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
3850 : struct ext4_map_blocks *map,
3851 : struct ext4_ext_path **ppath, int flags,
3852 : unsigned int allocated, ext4_fsblk_t newblock)
3853 : {
3854 0 : struct ext4_ext_path __maybe_unused *path = *ppath;
3855 0 : int ret = 0;
3856 0 : int err = 0;
3857 :
3858 0 : ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n",
3859 : (unsigned long long)map->m_lblk, map->m_len, flags,
3860 : allocated);
3861 0 : ext4_ext_show_leaf(inode, path);
3862 :
3863 : /*
3864 : * When writing into unwritten space, we should not fail to
3865 : * allocate metadata blocks for the new extent block if needed.
3866 : */
3867 0 : flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
3868 :
3869 0 : trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
3870 : allocated, newblock);
3871 :
3872 : /* get_block() before submitting IO, split the extent */
3873 0 : if (flags & EXT4_GET_BLOCKS_PRE_IO) {
3874 0 : ret = ext4_split_convert_extents(handle, inode, map, ppath,
3875 : flags | EXT4_GET_BLOCKS_CONVERT);
3876 0 : if (ret < 0) {
3877 0 : err = ret;
3878 0 : goto out2;
3879 : }
3880 : /*
3881 : * shouldn't get a 0 return when splitting an extent unless
3882 : * m_len is 0 (bug) or extent has been corrupted
3883 : */
3884 0 : if (unlikely(ret == 0)) {
3885 0 : EXT4_ERROR_INODE(inode,
3886 : "unexpected ret == 0, m_len = %u",
3887 : map->m_len);
3888 0 : err = -EFSCORRUPTED;
3889 0 : goto out2;
3890 : }
3891 0 : map->m_flags |= EXT4_MAP_UNWRITTEN;
3892 0 : goto out;
3893 : }
3894 : /* IO end_io complete, convert the filled extent to written */
3895 0 : if (flags & EXT4_GET_BLOCKS_CONVERT) {
3896 0 : err = ext4_convert_unwritten_extents_endio(handle, inode, map,
3897 : ppath);
3898 0 : if (err < 0)
3899 0 : goto out2;
3900 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
3901 0 : goto map_out;
3902 : }
3903 : /* buffered IO cases */
3904 : /*
3905 : * repeat fallocate creation request
3906 : * we already have an unwritten extent
3907 : */
3908 0 : if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
3909 0 : map->m_flags |= EXT4_MAP_UNWRITTEN;
3910 0 : goto map_out;
3911 : }
3912 :
3913 : /* buffered READ or buffered write_begin() lookup */
3914 0 : if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3915 : /*
3916 : * We have blocks reserved already. We
3917 : * return allocated blocks so that delalloc
3918 : * won't do block reservation for us. But
3919 : * the buffer head will be unmapped so that
3920 : * a read from the block returns 0s.
3921 : */
3922 0 : map->m_flags |= EXT4_MAP_UNWRITTEN;
3923 0 : goto out1;
3924 : }
3925 :
3926 : /*
3927 : * Default case when (flags & EXT4_GET_BLOCKS_CREATE) == 1.
3928 : * For buffered writes, at writepage time, etc. Convert a
3929 : * discovered unwritten extent to written.
3930 : */
3931 0 : ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
3932 0 : if (ret < 0) {
3933 0 : err = ret;
3934 0 : goto out2;
3935 : }
3936 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
3937 : /*
3938 : * shouldn't get a 0 return when converting an unwritten extent
3939 : * unless m_len is 0 (bug) or extent has been corrupted
3940 : */
3941 0 : if (unlikely(ret == 0)) {
3942 0 : EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u",
3943 : map->m_len);
3944 0 : err = -EFSCORRUPTED;
3945 0 : goto out2;
3946 : }
3947 :
3948 0 : out:
3949 0 : allocated = ret;
3950 0 : map->m_flags |= EXT4_MAP_NEW;
3951 0 : map_out:
3952 0 : map->m_flags |= EXT4_MAP_MAPPED;
3953 0 : out1:
3954 0 : map->m_pblk = newblock;
3955 0 : if (allocated > map->m_len)
3956 : allocated = map->m_len;
3957 0 : map->m_len = allocated;
3958 0 : ext4_ext_show_leaf(inode, path);
3959 0 : out2:
3960 0 : return err ? err : allocated;
3961 : }
3962 :
3963 : /*
3964 : * get_implied_cluster_alloc - check to see if the requested
3965 : * allocation (in the map structure) overlaps with a cluster already
3966 : * allocated in an extent.
3967 : * @sb The filesystem superblock structure
3968 : * @map The requested lblk->pblk mapping
3969 : * @ex The extent structure which might contain an implied
3970 : * cluster allocation
3971 : *
3972 : * This function is called by ext4_ext_map_blocks() after we failed to
3973 : * find blocks that were already in the inode's extent tree. Hence,
3974 : * we know that the beginning of the requested region cannot overlap
3975 : * the extent from the inode's extent tree. There are three cases we
3976 : * want to catch. The first is this case:
3977 : *
3978 : * |--- cluster # N--|
3979 : * |--- extent ---| |---- requested region ---|
3980 : * |==========|
3981 : *
3982 : * The second case that we need to test for is this one:
3983 : *
3984 : * |--------- cluster # N ----------------|
3985 : * |--- requested region --| |------- extent ----|
3986 : * |=======================|
3987 : *
3988 : * The third case is when the requested region lies between two extents
3989 : * within the same cluster:
3990 : * |------------- cluster # N-------------|
3991 : * |----- ex -----| |---- ex_right ----|
3992 : * |------ requested region ------|
3993 : * |================|
3994 : *
3995 : * In each of the above cases, we need to set the map->m_pblk and
3996 : * map->m_len so it corresponds to the return the extent labelled as
3997 : * "|====|" from cluster #N, since it is already in use for data in
3998 : * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
3999 : * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
4000 : * as a new "allocated" block region. Otherwise, we will return 0 and
4001 : * ext4_ext_map_blocks() will then allocate one or more new clusters
4002 : * by calling ext4_mb_new_blocks().
4003 : */
4004 0 : static int get_implied_cluster_alloc(struct super_block *sb,
4005 : struct ext4_map_blocks *map,
4006 : struct ext4_extent *ex,
4007 : struct ext4_ext_path *path)
4008 : {
4009 0 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4010 0 : ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4011 0 : ext4_lblk_t ex_cluster_start, ex_cluster_end;
4012 0 : ext4_lblk_t rr_cluster_start;
4013 0 : ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
4014 0 : ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4015 0 : unsigned short ee_len = ext4_ext_get_actual_len(ex);
4016 :
4017 : /* The extent passed in that we are trying to match */
4018 0 : ex_cluster_start = EXT4_B2C(sbi, ee_block);
4019 0 : ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
4020 :
4021 : /* The requested region passed into ext4_map_blocks() */
4022 0 : rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
4023 :
4024 0 : if ((rr_cluster_start == ex_cluster_end) ||
4025 0 : (rr_cluster_start == ex_cluster_start)) {
4026 0 : if (rr_cluster_start == ex_cluster_end)
4027 0 : ee_start += ee_len - 1;
4028 0 : map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
4029 0 : map->m_len = min(map->m_len,
4030 : (unsigned) sbi->s_cluster_ratio - c_offset);
4031 : /*
4032 : * Check for and handle this case:
4033 : *
4034 : * |--------- cluster # N-------------|
4035 : * |------- extent ----|
4036 : * |--- requested region ---|
4037 : * |===========|
4038 : */
4039 :
4040 0 : if (map->m_lblk < ee_block)
4041 0 : map->m_len = min(map->m_len, ee_block - map->m_lblk);
4042 :
4043 : /*
4044 : * Check for the case where there is already another allocated
4045 : * block to the right of 'ex' but before the end of the cluster.
4046 : *
4047 : * |------------- cluster # N-------------|
4048 : * |----- ex -----| |---- ex_right ----|
4049 : * |------ requested region ------|
4050 : * |================|
4051 : */
4052 0 : if (map->m_lblk > ee_block) {
4053 0 : ext4_lblk_t next = ext4_ext_next_allocated_block(path);
4054 0 : map->m_len = min(map->m_len, next - map->m_lblk);
4055 : }
4056 :
4057 0 : trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
4058 0 : return 1;
4059 : }
4060 :
4061 0 : trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
4062 0 : return 0;
4063 : }
4064 :
4065 :
4066 : /*
4067 : * Block allocation/map/preallocation routine for extents based files
4068 : *
4069 : *
4070 : * Need to be called with
4071 : * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
4072 : * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
4073 : *
4074 : * return > 0, number of blocks already mapped/allocated
4075 : * if create == 0 and these are pre-allocated blocks
4076 : * buffer head is unmapped
4077 : * otherwise blocks are mapped
4078 : *
4079 : * return = 0, if plain look up failed (blocks have not been allocated)
4080 : * buffer head is unmapped
4081 : *
4082 : * return < 0, error case.
4083 : */
4084 0 : int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4085 : struct ext4_map_blocks *map, int flags)
4086 : {
4087 0 : struct ext4_ext_path *path = NULL;
4088 0 : struct ext4_extent newex, *ex, ex2;
4089 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4090 0 : ext4_fsblk_t newblock = 0, pblk;
4091 0 : int err = 0, depth, ret;
4092 0 : unsigned int allocated = 0, offset = 0;
4093 0 : unsigned int allocated_clusters = 0;
4094 0 : struct ext4_allocation_request ar;
4095 0 : ext4_lblk_t cluster_offset;
4096 :
4097 0 : ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len);
4098 0 : trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
4099 :
4100 : /* find extent for this block */
4101 0 : path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
4102 0 : if (IS_ERR(path)) {
4103 0 : err = PTR_ERR(path);
4104 0 : path = NULL;
4105 0 : goto out;
4106 : }
4107 :
4108 0 : depth = ext_depth(inode);
4109 :
4110 : /*
4111 : * consistent leaf must not be empty;
4112 : * this situation is possible, though, _during_ tree modification;
4113 : * this is why assert can't be put in ext4_find_extent()
4114 : */
4115 0 : if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
4116 0 : EXT4_ERROR_INODE(inode, "bad extent address "
4117 : "lblock: %lu, depth: %d pblock %lld",
4118 : (unsigned long) map->m_lblk, depth,
4119 : path[depth].p_block);
4120 0 : err = -EFSCORRUPTED;
4121 0 : goto out;
4122 : }
4123 :
4124 0 : ex = path[depth].p_ext;
4125 0 : if (ex) {
4126 0 : ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
4127 0 : ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4128 0 : unsigned short ee_len;
4129 :
4130 :
4131 : /*
4132 : * unwritten extents are treated as holes, except that
4133 : * we split out initialized portions during a write.
4134 : */
4135 0 : ee_len = ext4_ext_get_actual_len(ex);
4136 :
4137 0 : trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
4138 :
4139 : /* if found extent covers block, simply return it */
4140 0 : if (in_range(map->m_lblk, ee_block, ee_len)) {
4141 0 : newblock = map->m_lblk - ee_block + ee_start;
4142 : /* number of remaining blocks in the extent */
4143 0 : allocated = ee_len - (map->m_lblk - ee_block);
4144 0 : ext_debug(inode, "%u fit into %u:%d -> %llu\n",
4145 : map->m_lblk, ee_block, ee_len, newblock);
4146 :
4147 : /*
4148 : * If the extent is initialized check whether the
4149 : * caller wants to convert it to unwritten.
4150 : */
4151 0 : if ((!ext4_ext_is_unwritten(ex)) &&
4152 0 : (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
4153 0 : err = convert_initialized_extent(handle,
4154 : inode, map, &path, &allocated);
4155 0 : goto out;
4156 0 : } else if (!ext4_ext_is_unwritten(ex)) {
4157 0 : map->m_flags |= EXT4_MAP_MAPPED;
4158 0 : map->m_pblk = newblock;
4159 0 : if (allocated > map->m_len)
4160 0 : allocated = map->m_len;
4161 0 : map->m_len = allocated;
4162 0 : ext4_ext_show_leaf(inode, path);
4163 0 : goto out;
4164 : }
4165 :
4166 0 : ret = ext4_ext_handle_unwritten_extents(
4167 : handle, inode, map, &path, flags,
4168 : allocated, newblock);
4169 0 : if (ret < 0)
4170 0 : err = ret;
4171 : else
4172 0 : allocated = ret;
4173 0 : goto out;
4174 : }
4175 : }
4176 :
4177 : /*
4178 : * requested block isn't allocated yet;
4179 : * we couldn't try to create block if create flag is zero
4180 : */
4181 0 : if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
4182 0 : ext4_lblk_t hole_start, hole_len;
4183 :
4184 0 : hole_start = map->m_lblk;
4185 0 : hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
4186 : /*
4187 : * put just found gap into cache to speed up
4188 : * subsequent requests
4189 : */
4190 0 : ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
4191 :
4192 : /* Update hole_len to reflect hole size after map->m_lblk */
4193 0 : if (hole_start != map->m_lblk)
4194 0 : hole_len -= map->m_lblk - hole_start;
4195 0 : map->m_pblk = 0;
4196 0 : map->m_len = min_t(unsigned int, map->m_len, hole_len);
4197 :
4198 0 : goto out;
4199 : }
4200 :
4201 : /*
4202 : * Okay, we need to do block allocation.
4203 : */
4204 0 : newex.ee_block = cpu_to_le32(map->m_lblk);
4205 0 : cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4206 :
4207 : /*
4208 : * If we are doing bigalloc, check to see if the extent returned
4209 : * by ext4_find_extent() implies a cluster we can use.
4210 : */
4211 0 : if (cluster_offset && ex &&
4212 0 : get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
4213 0 : ar.len = allocated = map->m_len;
4214 0 : newblock = map->m_pblk;
4215 0 : goto got_allocated_blocks;
4216 : }
4217 :
4218 : /* find neighbour allocated blocks */
4219 0 : ar.lleft = map->m_lblk;
4220 0 : err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
4221 0 : if (err)
4222 0 : goto out;
4223 0 : ar.lright = map->m_lblk;
4224 0 : err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
4225 0 : if (err < 0)
4226 0 : goto out;
4227 :
4228 : /* Check if the extent after searching to the right implies a
4229 : * cluster we can use. */
4230 0 : if ((sbi->s_cluster_ratio > 1) && err &&
4231 0 : get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) {
4232 0 : ar.len = allocated = map->m_len;
4233 0 : newblock = map->m_pblk;
4234 0 : goto got_allocated_blocks;
4235 : }
4236 :
4237 : /*
4238 : * See if request is beyond maximum number of blocks we can have in
4239 : * a single extent. For an initialized extent this limit is
4240 : * EXT_INIT_MAX_LEN and for an unwritten extent this limit is
4241 : * EXT_UNWRITTEN_MAX_LEN.
4242 : */
4243 0 : if (map->m_len > EXT_INIT_MAX_LEN &&
4244 0 : !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4245 0 : map->m_len = EXT_INIT_MAX_LEN;
4246 0 : else if (map->m_len > EXT_UNWRITTEN_MAX_LEN &&
4247 0 : (flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4248 0 : map->m_len = EXT_UNWRITTEN_MAX_LEN;
4249 :
4250 : /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
4251 0 : newex.ee_len = cpu_to_le16(map->m_len);
4252 0 : err = ext4_ext_check_overlap(sbi, inode, &newex, path);
4253 0 : if (err)
4254 0 : allocated = ext4_ext_get_actual_len(&newex);
4255 : else
4256 0 : allocated = map->m_len;
4257 :
4258 : /* allocate new block */
4259 0 : ar.inode = inode;
4260 0 : ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
4261 0 : ar.logical = map->m_lblk;
4262 : /*
4263 : * We calculate the offset from the beginning of the cluster
4264 : * for the logical block number, since when we allocate a
4265 : * physical cluster, the physical block should start at the
4266 : * same offset from the beginning of the cluster. This is
4267 : * needed so that future calls to get_implied_cluster_alloc()
4268 : * work correctly.
4269 : */
4270 0 : offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4271 0 : ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
4272 0 : ar.goal -= offset;
4273 0 : ar.logical -= offset;
4274 0 : if (S_ISREG(inode->i_mode))
4275 0 : ar.flags = EXT4_MB_HINT_DATA;
4276 : else
4277 : /* disable in-core preallocation for non-regular files */
4278 0 : ar.flags = 0;
4279 0 : if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
4280 0 : ar.flags |= EXT4_MB_HINT_NOPREALLOC;
4281 0 : if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4282 0 : ar.flags |= EXT4_MB_DELALLOC_RESERVED;
4283 0 : if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
4284 0 : ar.flags |= EXT4_MB_USE_RESERVED;
4285 0 : newblock = ext4_mb_new_blocks(handle, &ar, &err);
4286 0 : if (!newblock)
4287 0 : goto out;
4288 0 : allocated_clusters = ar.len;
4289 0 : ar.len = EXT4_C2B(sbi, ar.len) - offset;
4290 0 : ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n",
4291 : ar.goal, newblock, ar.len, allocated);
4292 0 : if (ar.len > allocated)
4293 0 : ar.len = allocated;
4294 :
4295 0 : got_allocated_blocks:
4296 : /* try to insert new extent into found leaf and return */
4297 0 : pblk = newblock + offset;
4298 0 : ext4_ext_store_pblock(&newex, pblk);
4299 0 : newex.ee_len = cpu_to_le16(ar.len);
4300 : /* Mark unwritten */
4301 0 : if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
4302 0 : ext4_ext_mark_unwritten(&newex);
4303 0 : map->m_flags |= EXT4_MAP_UNWRITTEN;
4304 : }
4305 :
4306 0 : err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags);
4307 0 : if (err) {
4308 0 : if (allocated_clusters) {
4309 0 : int fb_flags = 0;
4310 :
4311 : /*
4312 : * free data blocks we just allocated.
4313 : * not a good idea to call discard here directly,
4314 : * but otherwise we'd need to call it every free().
4315 : */
4316 0 : ext4_discard_preallocations(inode, 0);
4317 0 : if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4318 0 : fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
4319 0 : ext4_free_blocks(handle, inode, NULL, newblock,
4320 0 : EXT4_C2B(sbi, allocated_clusters),
4321 : fb_flags);
4322 : }
4323 0 : goto out;
4324 : }
4325 :
4326 : /*
4327 : * Reduce the reserved cluster count to reflect successful deferred
4328 : * allocation of delayed allocated clusters or direct allocation of
4329 : * clusters discovered to be delayed allocated. Once allocated, a
4330 : * cluster is not included in the reserved count.
4331 : */
4332 0 : if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) {
4333 0 : if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
4334 : /*
4335 : * When allocating delayed allocated clusters, simply
4336 : * reduce the reserved cluster count and claim quota
4337 : */
4338 0 : ext4_da_update_reserve_space(inode, allocated_clusters,
4339 : 1);
4340 : } else {
4341 0 : ext4_lblk_t lblk, len;
4342 0 : unsigned int n;
4343 :
4344 : /*
4345 : * When allocating non-delayed allocated clusters
4346 : * (from fallocate, filemap, DIO, or clusters
4347 : * allocated when delalloc has been disabled by
4348 : * ext4_nonda_switch), reduce the reserved cluster
4349 : * count by the number of allocated clusters that
4350 : * have previously been delayed allocated. Quota
4351 : * has been claimed by ext4_mb_new_blocks() above,
4352 : * so release the quota reservations made for any
4353 : * previously delayed allocated clusters.
4354 : */
4355 0 : lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk);
4356 0 : len = allocated_clusters << sbi->s_cluster_bits;
4357 0 : n = ext4_es_delayed_clu(inode, lblk, len);
4358 0 : if (n > 0)
4359 0 : ext4_da_update_reserve_space(inode, (int) n, 0);
4360 : }
4361 : }
4362 :
4363 : /*
4364 : * Cache the extent and update transaction to commit on fdatasync only
4365 : * when it is _not_ an unwritten extent.
4366 : */
4367 0 : if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0)
4368 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
4369 : else
4370 0 : ext4_update_inode_fsync_trans(handle, inode, 0);
4371 :
4372 0 : map->m_flags |= (EXT4_MAP_NEW | EXT4_MAP_MAPPED);
4373 0 : map->m_pblk = pblk;
4374 0 : map->m_len = ar.len;
4375 0 : allocated = map->m_len;
4376 0 : ext4_ext_show_leaf(inode, path);
4377 0 : out:
4378 0 : ext4_free_ext_path(path);
4379 :
4380 0 : trace_ext4_ext_map_blocks_exit(inode, flags, map,
4381 0 : err ? err : allocated);
4382 0 : return err ? err : allocated;
4383 : }
4384 :
4385 0 : int ext4_ext_truncate(handle_t *handle, struct inode *inode)
4386 : {
4387 0 : struct super_block *sb = inode->i_sb;
4388 0 : ext4_lblk_t last_block;
4389 0 : int err = 0;
4390 :
4391 : /*
4392 : * TODO: optimization is possible here.
4393 : * Probably we need not scan at all,
4394 : * because page truncation is enough.
4395 : */
4396 :
4397 : /* we have to know where to truncate from in crash case */
4398 0 : EXT4_I(inode)->i_disksize = inode->i_size;
4399 0 : err = ext4_mark_inode_dirty(handle, inode);
4400 0 : if (err)
4401 : return err;
4402 :
4403 0 : last_block = (inode->i_size + sb->s_blocksize - 1)
4404 0 : >> EXT4_BLOCK_SIZE_BITS(sb);
4405 0 : ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
4406 :
4407 0 : retry_remove_space:
4408 0 : err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4409 0 : if (err == -ENOMEM) {
4410 0 : memalloc_retry_wait(GFP_ATOMIC);
4411 0 : goto retry_remove_space;
4412 : }
4413 : return err;
4414 : }
4415 :
4416 0 : static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4417 : ext4_lblk_t len, loff_t new_size,
4418 : int flags)
4419 : {
4420 0 : struct inode *inode = file_inode(file);
4421 0 : handle_t *handle;
4422 0 : int ret = 0, ret2 = 0, ret3 = 0;
4423 0 : int retries = 0;
4424 0 : int depth = 0;
4425 0 : struct ext4_map_blocks map;
4426 0 : unsigned int credits;
4427 0 : loff_t epos;
4428 :
4429 0 : BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
4430 0 : map.m_lblk = offset;
4431 0 : map.m_len = len;
4432 : /*
4433 : * Don't normalize the request if it can fit in one extent so
4434 : * that it doesn't get unnecessarily split into multiple
4435 : * extents.
4436 : */
4437 0 : if (len <= EXT_UNWRITTEN_MAX_LEN)
4438 0 : flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4439 :
4440 : /*
4441 : * credits to insert 1 extent into extent tree
4442 : */
4443 0 : credits = ext4_chunk_trans_blocks(inode, len);
4444 0 : depth = ext_depth(inode);
4445 :
4446 : retry:
4447 0 : while (len) {
4448 : /*
4449 : * Recalculate credits when extent tree depth changes.
4450 : */
4451 0 : if (depth != ext_depth(inode)) {
4452 0 : credits = ext4_chunk_trans_blocks(inode, len);
4453 0 : depth = ext_depth(inode);
4454 : }
4455 :
4456 0 : handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4457 : credits);
4458 0 : if (IS_ERR(handle)) {
4459 0 : ret = PTR_ERR(handle);
4460 0 : break;
4461 : }
4462 0 : ret = ext4_map_blocks(handle, inode, &map, flags);
4463 0 : if (ret <= 0) {
4464 0 : ext4_debug("inode #%lu: block %u: len %u: "
4465 : "ext4_ext_map_blocks returned %d",
4466 : inode->i_ino, map.m_lblk,
4467 : map.m_len, ret);
4468 0 : ext4_mark_inode_dirty(handle, inode);
4469 0 : ext4_journal_stop(handle);
4470 0 : break;
4471 : }
4472 : /*
4473 : * allow a full retry cycle for any remaining allocations
4474 : */
4475 0 : retries = 0;
4476 0 : map.m_lblk += ret;
4477 0 : map.m_len = len = len - ret;
4478 0 : epos = (loff_t)map.m_lblk << inode->i_blkbits;
4479 0 : inode->i_ctime = current_time(inode);
4480 0 : if (new_size) {
4481 0 : if (epos > new_size)
4482 : epos = new_size;
4483 0 : if (ext4_update_inode_size(inode, epos) & 0x1)
4484 0 : inode->i_mtime = inode->i_ctime;
4485 : }
4486 0 : ret2 = ext4_mark_inode_dirty(handle, inode);
4487 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
4488 0 : ret3 = ext4_journal_stop(handle);
4489 0 : ret2 = ret3 ? ret3 : ret2;
4490 0 : if (unlikely(ret2))
4491 : break;
4492 : }
4493 0 : if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
4494 0 : goto retry;
4495 :
4496 0 : return ret > 0 ? ret2 : ret;
4497 : }
4498 :
4499 : static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
4500 :
4501 : static int ext4_insert_range(struct file *file, loff_t offset, loff_t len);
4502 :
4503 0 : static long ext4_zero_range(struct file *file, loff_t offset,
4504 : loff_t len, int mode)
4505 : {
4506 0 : struct inode *inode = file_inode(file);
4507 0 : struct address_space *mapping = file->f_mapping;
4508 0 : handle_t *handle = NULL;
4509 0 : unsigned int max_blocks;
4510 0 : loff_t new_size = 0;
4511 0 : int ret = 0;
4512 0 : int flags;
4513 0 : int credits;
4514 0 : int partial_begin, partial_end;
4515 0 : loff_t start, end;
4516 0 : ext4_lblk_t lblk;
4517 0 : unsigned int blkbits = inode->i_blkbits;
4518 :
4519 0 : trace_ext4_zero_range(inode, offset, len, mode);
4520 :
4521 : /*
4522 : * Round up offset. This is not fallocate, we need to zero out
4523 : * blocks, so convert interior block aligned part of the range to
4524 : * unwritten and possibly manually zero out unaligned parts of the
4525 : * range.
4526 : */
4527 0 : start = round_up(offset, 1 << blkbits);
4528 0 : end = round_down((offset + len), 1 << blkbits);
4529 :
4530 0 : if (start < offset || end > offset + len)
4531 : return -EINVAL;
4532 0 : partial_begin = offset & ((1 << blkbits) - 1);
4533 0 : partial_end = (offset + len) & ((1 << blkbits) - 1);
4534 :
4535 0 : lblk = start >> blkbits;
4536 0 : max_blocks = (end >> blkbits);
4537 0 : if (max_blocks < lblk)
4538 : max_blocks = 0;
4539 : else
4540 0 : max_blocks -= lblk;
4541 :
4542 0 : inode_lock(inode);
4543 :
4544 : /*
4545 : * Indirect files do not support unwritten extents
4546 : */
4547 0 : if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4548 0 : ret = -EOPNOTSUPP;
4549 0 : goto out_mutex;
4550 : }
4551 :
4552 0 : if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4553 0 : (offset + len > inode->i_size ||
4554 0 : offset + len > EXT4_I(inode)->i_disksize)) {
4555 0 : new_size = offset + len;
4556 0 : ret = inode_newsize_ok(inode, new_size);
4557 0 : if (ret)
4558 0 : goto out_mutex;
4559 : }
4560 :
4561 0 : flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4562 :
4563 : /* Wait all existing dio workers, newcomers will block on i_rwsem */
4564 0 : inode_dio_wait(inode);
4565 :
4566 0 : ret = file_modified(file);
4567 0 : if (ret)
4568 0 : goto out_mutex;
4569 :
4570 : /* Preallocate the range including the unaligned edges */
4571 0 : if (partial_begin || partial_end) {
4572 0 : ret = ext4_alloc_file_blocks(file,
4573 0 : round_down(offset, 1 << blkbits) >> blkbits,
4574 0 : (round_up((offset + len), 1 << blkbits) -
4575 0 : round_down(offset, 1 << blkbits)) >> blkbits,
4576 : new_size, flags);
4577 0 : if (ret)
4578 0 : goto out_mutex;
4579 :
4580 : }
4581 :
4582 : /* Zero range excluding the unaligned edges */
4583 0 : if (max_blocks > 0) {
4584 0 : flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4585 : EXT4_EX_NOCACHE);
4586 :
4587 : /*
4588 : * Prevent page faults from reinstantiating pages we have
4589 : * released from page cache.
4590 : */
4591 0 : filemap_invalidate_lock(mapping);
4592 :
4593 0 : ret = ext4_break_layouts(inode);
4594 0 : if (ret) {
4595 0 : filemap_invalidate_unlock(mapping);
4596 0 : goto out_mutex;
4597 : }
4598 :
4599 0 : ret = ext4_update_disksize_before_punch(inode, offset, len);
4600 0 : if (ret) {
4601 0 : filemap_invalidate_unlock(mapping);
4602 0 : goto out_mutex;
4603 : }
4604 :
4605 : /*
4606 : * For journalled data we need to write (and checkpoint) pages
4607 : * before discarding page cache to avoid inconsitent data on
4608 : * disk in case of crash before zeroing trans is committed.
4609 : */
4610 0 : if (ext4_should_journal_data(inode)) {
4611 0 : ret = filemap_write_and_wait_range(mapping, start, end);
4612 0 : if (ret) {
4613 0 : filemap_invalidate_unlock(mapping);
4614 0 : goto out_mutex;
4615 : }
4616 : }
4617 :
4618 : /* Now release the pages and zero block aligned part of pages */
4619 0 : truncate_pagecache_range(inode, start, end - 1);
4620 0 : inode->i_mtime = inode->i_ctime = current_time(inode);
4621 :
4622 0 : ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4623 : flags);
4624 0 : filemap_invalidate_unlock(mapping);
4625 0 : if (ret)
4626 0 : goto out_mutex;
4627 : }
4628 0 : if (!partial_begin && !partial_end)
4629 0 : goto out_mutex;
4630 :
4631 : /*
4632 : * In worst case we have to writeout two nonadjacent unwritten
4633 : * blocks and update the inode
4634 : */
4635 0 : credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
4636 0 : if (ext4_should_journal_data(inode))
4637 0 : credits += 2;
4638 0 : handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
4639 0 : if (IS_ERR(handle)) {
4640 0 : ret = PTR_ERR(handle);
4641 0 : ext4_std_error(inode->i_sb, ret);
4642 0 : goto out_mutex;
4643 : }
4644 :
4645 0 : inode->i_mtime = inode->i_ctime = current_time(inode);
4646 0 : if (new_size)
4647 0 : ext4_update_inode_size(inode, new_size);
4648 0 : ret = ext4_mark_inode_dirty(handle, inode);
4649 0 : if (unlikely(ret))
4650 0 : goto out_handle;
4651 : /* Zero out partial block at the edges of the range */
4652 0 : ret = ext4_zero_partial_blocks(handle, inode, offset, len);
4653 0 : if (ret >= 0)
4654 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
4655 :
4656 0 : if (file->f_flags & O_SYNC)
4657 0 : ext4_handle_sync(handle);
4658 :
4659 0 : out_handle:
4660 0 : ext4_journal_stop(handle);
4661 0 : out_mutex:
4662 0 : inode_unlock(inode);
4663 0 : return ret;
4664 : }
4665 :
4666 : /*
4667 : * preallocate space for a file. This implements ext4's fallocate file
4668 : * operation, which gets called from sys_fallocate system call.
4669 : * For block-mapped files, posix_fallocate should fall back to the method
4670 : * of writing zeroes to the required new blocks (the same behavior which is
4671 : * expected for file systems which do not support fallocate() system call).
4672 : */
4673 0 : long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4674 : {
4675 0 : struct inode *inode = file_inode(file);
4676 0 : loff_t new_size = 0;
4677 0 : unsigned int max_blocks;
4678 0 : int ret = 0;
4679 0 : int flags;
4680 0 : ext4_lblk_t lblk;
4681 0 : unsigned int blkbits = inode->i_blkbits;
4682 :
4683 : /*
4684 : * Encrypted inodes can't handle collapse range or insert
4685 : * range since we would need to re-encrypt blocks with a
4686 : * different IV or XTS tweak (which are based on the logical
4687 : * block number).
4688 : */
4689 0 : if (IS_ENCRYPTED(inode) &&
4690 0 : (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
4691 : return -EOPNOTSUPP;
4692 :
4693 : /* Return error if mode is not supported */
4694 0 : if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4695 : FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
4696 : FALLOC_FL_INSERT_RANGE))
4697 : return -EOPNOTSUPP;
4698 :
4699 0 : inode_lock(inode);
4700 0 : ret = ext4_convert_inline_data(inode);
4701 0 : inode_unlock(inode);
4702 0 : if (ret)
4703 0 : goto exit;
4704 :
4705 0 : if (mode & FALLOC_FL_PUNCH_HOLE) {
4706 0 : ret = ext4_punch_hole(file, offset, len);
4707 0 : goto exit;
4708 : }
4709 :
4710 0 : if (mode & FALLOC_FL_COLLAPSE_RANGE) {
4711 0 : ret = ext4_collapse_range(file, offset, len);
4712 0 : goto exit;
4713 : }
4714 :
4715 0 : if (mode & FALLOC_FL_INSERT_RANGE) {
4716 0 : ret = ext4_insert_range(file, offset, len);
4717 0 : goto exit;
4718 : }
4719 :
4720 0 : if (mode & FALLOC_FL_ZERO_RANGE) {
4721 0 : ret = ext4_zero_range(file, offset, len, mode);
4722 0 : goto exit;
4723 : }
4724 0 : trace_ext4_fallocate_enter(inode, offset, len, mode);
4725 0 : lblk = offset >> blkbits;
4726 :
4727 0 : max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
4728 0 : flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4729 :
4730 0 : inode_lock(inode);
4731 :
4732 : /*
4733 : * We only support preallocation for extent-based files only
4734 : */
4735 0 : if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4736 0 : ret = -EOPNOTSUPP;
4737 0 : goto out;
4738 : }
4739 :
4740 0 : if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4741 0 : (offset + len > inode->i_size ||
4742 0 : offset + len > EXT4_I(inode)->i_disksize)) {
4743 0 : new_size = offset + len;
4744 0 : ret = inode_newsize_ok(inode, new_size);
4745 0 : if (ret)
4746 0 : goto out;
4747 : }
4748 :
4749 : /* Wait all existing dio workers, newcomers will block on i_rwsem */
4750 0 : inode_dio_wait(inode);
4751 :
4752 0 : ret = file_modified(file);
4753 0 : if (ret)
4754 0 : goto out;
4755 :
4756 0 : ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
4757 0 : if (ret)
4758 0 : goto out;
4759 :
4760 0 : if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
4761 0 : ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
4762 0 : EXT4_I(inode)->i_sync_tid);
4763 : }
4764 0 : out:
4765 0 : inode_unlock(inode);
4766 0 : trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
4767 0 : exit:
4768 0 : return ret;
4769 : }
4770 :
4771 : /*
4772 : * This function convert a range of blocks to written extents
4773 : * The caller of this function will pass the start offset and the size.
4774 : * all unwritten extents within this range will be converted to
4775 : * written extents.
4776 : *
4777 : * This function is called from the direct IO end io call back
4778 : * function, to convert the fallocated extents after IO is completed.
4779 : * Returns 0 on success.
4780 : */
4781 0 : int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
4782 : loff_t offset, ssize_t len)
4783 : {
4784 0 : unsigned int max_blocks;
4785 0 : int ret = 0, ret2 = 0, ret3 = 0;
4786 0 : struct ext4_map_blocks map;
4787 0 : unsigned int blkbits = inode->i_blkbits;
4788 0 : unsigned int credits = 0;
4789 :
4790 0 : map.m_lblk = offset >> blkbits;
4791 0 : max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
4792 :
4793 0 : if (!handle) {
4794 : /*
4795 : * credits to insert 1 extent into extent tree
4796 : */
4797 0 : credits = ext4_chunk_trans_blocks(inode, max_blocks);
4798 : }
4799 0 : while (ret >= 0 && ret < max_blocks) {
4800 0 : map.m_lblk += ret;
4801 0 : map.m_len = (max_blocks -= ret);
4802 0 : if (credits) {
4803 0 : handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4804 : credits);
4805 0 : if (IS_ERR(handle)) {
4806 0 : ret = PTR_ERR(handle);
4807 0 : break;
4808 : }
4809 : }
4810 0 : ret = ext4_map_blocks(handle, inode, &map,
4811 : EXT4_GET_BLOCKS_IO_CONVERT_EXT);
4812 0 : if (ret <= 0)
4813 0 : ext4_warning(inode->i_sb,
4814 : "inode #%lu: block %u: len %u: "
4815 : "ext4_ext_map_blocks returned %d",
4816 : inode->i_ino, map.m_lblk,
4817 : map.m_len, ret);
4818 0 : ret2 = ext4_mark_inode_dirty(handle, inode);
4819 0 : if (credits) {
4820 0 : ret3 = ext4_journal_stop(handle);
4821 0 : if (unlikely(ret3))
4822 0 : ret2 = ret3;
4823 : }
4824 :
4825 0 : if (ret <= 0 || ret2)
4826 : break;
4827 : }
4828 0 : return ret > 0 ? ret2 : ret;
4829 : }
4830 :
4831 0 : int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
4832 : {
4833 0 : int ret = 0, err = 0;
4834 0 : struct ext4_io_end_vec *io_end_vec;
4835 :
4836 : /*
4837 : * This is somewhat ugly but the idea is clear: When transaction is
4838 : * reserved, everything goes into it. Otherwise we rather start several
4839 : * smaller transactions for conversion of each extent separately.
4840 : */
4841 0 : if (handle) {
4842 0 : handle = ext4_journal_start_reserved(handle,
4843 : EXT4_HT_EXT_CONVERT);
4844 0 : if (IS_ERR(handle))
4845 0 : return PTR_ERR(handle);
4846 : }
4847 :
4848 0 : list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
4849 0 : ret = ext4_convert_unwritten_extents(handle, io_end->inode,
4850 : io_end_vec->offset,
4851 : io_end_vec->size);
4852 0 : if (ret)
4853 : break;
4854 : }
4855 :
4856 0 : if (handle)
4857 0 : err = ext4_journal_stop(handle);
4858 :
4859 0 : return ret < 0 ? ret : err;
4860 : }
4861 :
4862 0 : static int ext4_iomap_xattr_fiemap(struct inode *inode, struct iomap *iomap)
4863 : {
4864 0 : __u64 physical = 0;
4865 0 : __u64 length = 0;
4866 0 : int blockbits = inode->i_sb->s_blocksize_bits;
4867 0 : int error = 0;
4868 0 : u16 iomap_type;
4869 :
4870 : /* in-inode? */
4871 0 : if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
4872 0 : struct ext4_iloc iloc;
4873 0 : int offset; /* offset of xattr in inode */
4874 :
4875 0 : error = ext4_get_inode_loc(inode, &iloc);
4876 0 : if (error)
4877 0 : return error;
4878 0 : physical = (__u64)iloc.bh->b_blocknr << blockbits;
4879 0 : offset = EXT4_GOOD_OLD_INODE_SIZE +
4880 0 : EXT4_I(inode)->i_extra_isize;
4881 0 : physical += offset;
4882 0 : length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
4883 0 : brelse(iloc.bh);
4884 0 : iomap_type = IOMAP_INLINE;
4885 0 : } else if (EXT4_I(inode)->i_file_acl) { /* external block */
4886 0 : physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits;
4887 0 : length = inode->i_sb->s_blocksize;
4888 0 : iomap_type = IOMAP_MAPPED;
4889 : } else {
4890 : /* no in-inode or external block for xattr, so return -ENOENT */
4891 0 : error = -ENOENT;
4892 0 : goto out;
4893 : }
4894 :
4895 0 : iomap->addr = physical;
4896 0 : iomap->offset = 0;
4897 0 : iomap->length = length;
4898 0 : iomap->type = iomap_type;
4899 0 : iomap->flags = 0;
4900 : out:
4901 : return error;
4902 : }
4903 :
4904 0 : static int ext4_iomap_xattr_begin(struct inode *inode, loff_t offset,
4905 : loff_t length, unsigned flags,
4906 : struct iomap *iomap, struct iomap *srcmap)
4907 : {
4908 0 : int error;
4909 :
4910 0 : error = ext4_iomap_xattr_fiemap(inode, iomap);
4911 0 : if (error == 0 && (offset >= iomap->length))
4912 0 : error = -ENOENT;
4913 0 : return error;
4914 : }
4915 :
4916 : static const struct iomap_ops ext4_iomap_xattr_ops = {
4917 : .iomap_begin = ext4_iomap_xattr_begin,
4918 : };
4919 :
4920 0 : static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len)
4921 : {
4922 0 : u64 maxbytes;
4923 :
4924 0 : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
4925 0 : maxbytes = inode->i_sb->s_maxbytes;
4926 : else
4927 0 : maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
4928 :
4929 0 : if (*len == 0)
4930 : return -EINVAL;
4931 0 : if (start > maxbytes)
4932 : return -EFBIG;
4933 :
4934 : /*
4935 : * Shrink request scope to what the fs can actually handle.
4936 : */
4937 0 : if (*len > maxbytes || (maxbytes - *len) < start)
4938 0 : *len = maxbytes - start;
4939 : return 0;
4940 : }
4941 :
4942 0 : int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4943 : u64 start, u64 len)
4944 : {
4945 0 : int error = 0;
4946 :
4947 0 : if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4948 0 : error = ext4_ext_precache(inode);
4949 0 : if (error)
4950 : return error;
4951 0 : fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
4952 : }
4953 :
4954 : /*
4955 : * For bitmap files the maximum size limit could be smaller than
4956 : * s_maxbytes, so check len here manually instead of just relying on the
4957 : * generic check.
4958 : */
4959 0 : error = ext4_fiemap_check_ranges(inode, start, &len);
4960 0 : if (error)
4961 : return error;
4962 :
4963 0 : if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
4964 0 : fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
4965 0 : return iomap_fiemap(inode, fieinfo, start, len,
4966 : &ext4_iomap_xattr_ops);
4967 : }
4968 :
4969 0 : return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_report_ops);
4970 : }
4971 :
4972 0 : int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
4973 : __u64 start, __u64 len)
4974 : {
4975 0 : ext4_lblk_t start_blk, len_blks;
4976 0 : __u64 last_blk;
4977 0 : int error = 0;
4978 :
4979 0 : if (ext4_has_inline_data(inode)) {
4980 0 : int has_inline;
4981 :
4982 0 : down_read(&EXT4_I(inode)->xattr_sem);
4983 0 : has_inline = ext4_has_inline_data(inode);
4984 0 : up_read(&EXT4_I(inode)->xattr_sem);
4985 0 : if (has_inline)
4986 : return 0;
4987 : }
4988 :
4989 0 : if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4990 0 : error = ext4_ext_precache(inode);
4991 0 : if (error)
4992 : return error;
4993 0 : fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
4994 : }
4995 :
4996 0 : error = fiemap_prep(inode, fieinfo, start, &len, 0);
4997 0 : if (error)
4998 : return error;
4999 :
5000 0 : error = ext4_fiemap_check_ranges(inode, start, &len);
5001 0 : if (error)
5002 : return error;
5003 :
5004 0 : start_blk = start >> inode->i_sb->s_blocksize_bits;
5005 0 : last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
5006 0 : if (last_blk >= EXT_MAX_BLOCKS)
5007 : last_blk = EXT_MAX_BLOCKS-1;
5008 0 : len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
5009 :
5010 : /*
5011 : * Walk the extent tree gathering extent information
5012 : * and pushing extents back to the user.
5013 : */
5014 0 : return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
5015 : }
5016 :
5017 : /*
5018 : * ext4_ext_shift_path_extents:
5019 : * Shift the extents of a path structure lying between path[depth].p_ext
5020 : * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
5021 : * if it is right shift or left shift operation.
5022 : */
5023 : static int
5024 0 : ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5025 : struct inode *inode, handle_t *handle,
5026 : enum SHIFT_DIRECTION SHIFT)
5027 : {
5028 0 : int depth, err = 0;
5029 0 : struct ext4_extent *ex_start, *ex_last;
5030 0 : bool update = false;
5031 0 : int credits, restart_credits;
5032 0 : depth = path->p_depth;
5033 :
5034 0 : while (depth >= 0) {
5035 0 : if (depth == path->p_depth) {
5036 0 : ex_start = path[depth].p_ext;
5037 0 : if (!ex_start)
5038 : return -EFSCORRUPTED;
5039 :
5040 0 : ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
5041 : /* leaf + sb + inode */
5042 0 : credits = 3;
5043 0 : if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) {
5044 0 : update = true;
5045 : /* extent tree + sb + inode */
5046 0 : credits = depth + 2;
5047 : }
5048 :
5049 0 : restart_credits = ext4_writepage_trans_blocks(inode);
5050 0 : err = ext4_datasem_ensure_credits(handle, inode, credits,
5051 : restart_credits, 0);
5052 0 : if (err) {
5053 0 : if (err > 0)
5054 0 : err = -EAGAIN;
5055 0 : goto out;
5056 : }
5057 :
5058 0 : err = ext4_ext_get_access(handle, inode, path + depth);
5059 0 : if (err)
5060 0 : goto out;
5061 :
5062 0 : while (ex_start <= ex_last) {
5063 0 : if (SHIFT == SHIFT_LEFT) {
5064 0 : le32_add_cpu(&ex_start->ee_block,
5065 : -shift);
5066 : /* Try to merge to the left. */
5067 0 : if ((ex_start >
5068 0 : EXT_FIRST_EXTENT(path[depth].p_hdr))
5069 0 : &&
5070 0 : ext4_ext_try_to_merge_right(inode,
5071 : path, ex_start - 1))
5072 0 : ex_last--;
5073 : else
5074 0 : ex_start++;
5075 : } else {
5076 0 : le32_add_cpu(&ex_last->ee_block, shift);
5077 0 : ext4_ext_try_to_merge_right(inode, path,
5078 : ex_last);
5079 0 : ex_last--;
5080 : }
5081 : }
5082 0 : err = ext4_ext_dirty(handle, inode, path + depth);
5083 0 : if (err)
5084 0 : goto out;
5085 :
5086 0 : if (--depth < 0 || !update)
5087 : break;
5088 : }
5089 :
5090 : /* Update index too */
5091 0 : err = ext4_ext_get_access(handle, inode, path + depth);
5092 0 : if (err)
5093 0 : goto out;
5094 :
5095 0 : if (SHIFT == SHIFT_LEFT)
5096 0 : le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
5097 : else
5098 0 : le32_add_cpu(&path[depth].p_idx->ei_block, shift);
5099 0 : err = ext4_ext_dirty(handle, inode, path + depth);
5100 0 : if (err)
5101 0 : goto out;
5102 :
5103 : /* we are done if current index is not a starting index */
5104 0 : if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
5105 : break;
5106 :
5107 0 : depth--;
5108 : }
5109 :
5110 0 : out:
5111 : return err;
5112 : }
5113 :
5114 : /*
5115 : * ext4_ext_shift_extents:
5116 : * All the extents which lies in the range from @start to the last allocated
5117 : * block for the @inode are shifted either towards left or right (depending
5118 : * upon @SHIFT) by @shift blocks.
5119 : * On success, 0 is returned, error otherwise.
5120 : */
5121 : static int
5122 0 : ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5123 : ext4_lblk_t start, ext4_lblk_t shift,
5124 : enum SHIFT_DIRECTION SHIFT)
5125 : {
5126 0 : struct ext4_ext_path *path;
5127 0 : int ret = 0, depth;
5128 0 : struct ext4_extent *extent;
5129 0 : ext4_lblk_t stop, *iterator, ex_start, ex_end;
5130 0 : ext4_lblk_t tmp = EXT_MAX_BLOCKS;
5131 :
5132 : /* Let path point to the last extent */
5133 0 : path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5134 : EXT4_EX_NOCACHE);
5135 0 : if (IS_ERR(path))
5136 0 : return PTR_ERR(path);
5137 :
5138 0 : depth = path->p_depth;
5139 0 : extent = path[depth].p_ext;
5140 0 : if (!extent)
5141 0 : goto out;
5142 :
5143 0 : stop = le32_to_cpu(extent->ee_block);
5144 :
5145 : /*
5146 : * For left shifts, make sure the hole on the left is big enough to
5147 : * accommodate the shift. For right shifts, make sure the last extent
5148 : * won't be shifted beyond EXT_MAX_BLOCKS.
5149 : */
5150 0 : if (SHIFT == SHIFT_LEFT) {
5151 0 : path = ext4_find_extent(inode, start - 1, &path,
5152 : EXT4_EX_NOCACHE);
5153 0 : if (IS_ERR(path))
5154 0 : return PTR_ERR(path);
5155 0 : depth = path->p_depth;
5156 0 : extent = path[depth].p_ext;
5157 0 : if (extent) {
5158 0 : ex_start = le32_to_cpu(extent->ee_block);
5159 0 : ex_end = le32_to_cpu(extent->ee_block) +
5160 0 : ext4_ext_get_actual_len(extent);
5161 : } else {
5162 : ex_start = 0;
5163 : ex_end = 0;
5164 : }
5165 :
5166 0 : if ((start == ex_start && shift > ex_start) ||
5167 0 : (shift > start - ex_end)) {
5168 0 : ret = -EINVAL;
5169 0 : goto out;
5170 : }
5171 : } else {
5172 0 : if (shift > EXT_MAX_BLOCKS -
5173 0 : (stop + ext4_ext_get_actual_len(extent))) {
5174 0 : ret = -EINVAL;
5175 0 : goto out;
5176 : }
5177 : }
5178 :
5179 : /*
5180 : * In case of left shift, iterator points to start and it is increased
5181 : * till we reach stop. In case of right shift, iterator points to stop
5182 : * and it is decreased till we reach start.
5183 : */
5184 0 : again:
5185 0 : ret = 0;
5186 0 : if (SHIFT == SHIFT_LEFT)
5187 : iterator = &start;
5188 : else
5189 0 : iterator = &stop;
5190 :
5191 0 : if (tmp != EXT_MAX_BLOCKS)
5192 0 : *iterator = tmp;
5193 :
5194 : /*
5195 : * Its safe to start updating extents. Start and stop are unsigned, so
5196 : * in case of right shift if extent with 0 block is reached, iterator
5197 : * becomes NULL to indicate the end of the loop.
5198 : */
5199 0 : while (iterator && start <= stop) {
5200 0 : path = ext4_find_extent(inode, *iterator, &path,
5201 : EXT4_EX_NOCACHE);
5202 0 : if (IS_ERR(path))
5203 0 : return PTR_ERR(path);
5204 0 : depth = path->p_depth;
5205 0 : extent = path[depth].p_ext;
5206 0 : if (!extent) {
5207 0 : EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
5208 : (unsigned long) *iterator);
5209 0 : return -EFSCORRUPTED;
5210 : }
5211 0 : if (SHIFT == SHIFT_LEFT && *iterator >
5212 0 : le32_to_cpu(extent->ee_block)) {
5213 : /* Hole, move to the next extent */
5214 0 : if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
5215 0 : path[depth].p_ext++;
5216 : } else {
5217 0 : *iterator = ext4_ext_next_allocated_block(path);
5218 0 : continue;
5219 : }
5220 : }
5221 :
5222 0 : tmp = *iterator;
5223 0 : if (SHIFT == SHIFT_LEFT) {
5224 0 : extent = EXT_LAST_EXTENT(path[depth].p_hdr);
5225 0 : *iterator = le32_to_cpu(extent->ee_block) +
5226 0 : ext4_ext_get_actual_len(extent);
5227 : } else {
5228 0 : extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
5229 0 : if (le32_to_cpu(extent->ee_block) > start)
5230 0 : *iterator = le32_to_cpu(extent->ee_block) - 1;
5231 0 : else if (le32_to_cpu(extent->ee_block) == start)
5232 : iterator = NULL;
5233 : else {
5234 0 : extent = EXT_LAST_EXTENT(path[depth].p_hdr);
5235 0 : while (le32_to_cpu(extent->ee_block) >= start)
5236 0 : extent--;
5237 :
5238 0 : if (extent == EXT_LAST_EXTENT(path[depth].p_hdr))
5239 : break;
5240 :
5241 0 : extent++;
5242 0 : iterator = NULL;
5243 : }
5244 0 : path[depth].p_ext = extent;
5245 : }
5246 0 : ret = ext4_ext_shift_path_extents(path, shift, inode,
5247 : handle, SHIFT);
5248 : /* iterator can be NULL which means we should break */
5249 0 : if (ret == -EAGAIN)
5250 0 : goto again;
5251 0 : if (ret)
5252 : break;
5253 : }
5254 0 : out:
5255 0 : ext4_free_ext_path(path);
5256 0 : return ret;
5257 : }
5258 :
5259 : /*
5260 : * ext4_collapse_range:
5261 : * This implements the fallocate's collapse range functionality for ext4
5262 : * Returns: 0 and non-zero on error.
5263 : */
5264 0 : static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
5265 : {
5266 0 : struct inode *inode = file_inode(file);
5267 0 : struct super_block *sb = inode->i_sb;
5268 0 : struct address_space *mapping = inode->i_mapping;
5269 0 : ext4_lblk_t punch_start, punch_stop;
5270 0 : handle_t *handle;
5271 0 : unsigned int credits;
5272 0 : loff_t new_size, ioffset;
5273 0 : int ret;
5274 :
5275 : /*
5276 : * We need to test this early because xfstests assumes that a
5277 : * collapse range of (0, 1) will return EOPNOTSUPP if the file
5278 : * system does not support collapse range.
5279 : */
5280 0 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5281 : return -EOPNOTSUPP;
5282 :
5283 : /* Collapse range works only on fs cluster size aligned regions. */
5284 0 : if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
5285 : return -EINVAL;
5286 :
5287 0 : trace_ext4_collapse_range(inode, offset, len);
5288 :
5289 0 : punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5290 0 : punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5291 :
5292 0 : inode_lock(inode);
5293 : /*
5294 : * There is no need to overlap collapse range with EOF, in which case
5295 : * it is effectively a truncate operation
5296 : */
5297 0 : if (offset + len >= inode->i_size) {
5298 0 : ret = -EINVAL;
5299 0 : goto out_mutex;
5300 : }
5301 :
5302 : /* Currently just for extent based files */
5303 0 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5304 0 : ret = -EOPNOTSUPP;
5305 0 : goto out_mutex;
5306 : }
5307 :
5308 : /* Wait for existing dio to complete */
5309 0 : inode_dio_wait(inode);
5310 :
5311 0 : ret = file_modified(file);
5312 0 : if (ret)
5313 0 : goto out_mutex;
5314 :
5315 : /*
5316 : * Prevent page faults from reinstantiating pages we have released from
5317 : * page cache.
5318 : */
5319 0 : filemap_invalidate_lock(mapping);
5320 :
5321 0 : ret = ext4_break_layouts(inode);
5322 0 : if (ret)
5323 0 : goto out_mmap;
5324 :
5325 : /*
5326 : * Need to round down offset to be aligned with page size boundary
5327 : * for page size > block size.
5328 : */
5329 0 : ioffset = round_down(offset, PAGE_SIZE);
5330 : /*
5331 : * Write tail of the last page before removed range since it will get
5332 : * removed from the page cache below.
5333 : */
5334 0 : ret = filemap_write_and_wait_range(mapping, ioffset, offset);
5335 0 : if (ret)
5336 0 : goto out_mmap;
5337 : /*
5338 : * Write data that will be shifted to preserve them when discarding
5339 : * page cache below. We are also protected from pages becoming dirty
5340 : * by i_rwsem and invalidate_lock.
5341 : */
5342 0 : ret = filemap_write_and_wait_range(mapping, offset + len,
5343 : LLONG_MAX);
5344 0 : if (ret)
5345 0 : goto out_mmap;
5346 0 : truncate_pagecache(inode, ioffset);
5347 :
5348 0 : credits = ext4_writepage_trans_blocks(inode);
5349 0 : handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5350 0 : if (IS_ERR(handle)) {
5351 0 : ret = PTR_ERR(handle);
5352 0 : goto out_mmap;
5353 : }
5354 0 : ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
5355 :
5356 0 : down_write(&EXT4_I(inode)->i_data_sem);
5357 0 : ext4_discard_preallocations(inode, 0);
5358 0 : ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start);
5359 :
5360 0 : ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
5361 0 : if (ret) {
5362 0 : up_write(&EXT4_I(inode)->i_data_sem);
5363 0 : goto out_stop;
5364 : }
5365 0 : ext4_discard_preallocations(inode, 0);
5366 :
5367 0 : ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5368 : punch_stop - punch_start, SHIFT_LEFT);
5369 0 : if (ret) {
5370 0 : up_write(&EXT4_I(inode)->i_data_sem);
5371 0 : goto out_stop;
5372 : }
5373 :
5374 0 : new_size = inode->i_size - len;
5375 0 : i_size_write(inode, new_size);
5376 0 : EXT4_I(inode)->i_disksize = new_size;
5377 :
5378 0 : up_write(&EXT4_I(inode)->i_data_sem);
5379 0 : if (IS_SYNC(inode))
5380 0 : ext4_handle_sync(handle);
5381 0 : inode->i_mtime = inode->i_ctime = current_time(inode);
5382 0 : ret = ext4_mark_inode_dirty(handle, inode);
5383 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
5384 :
5385 0 : out_stop:
5386 0 : ext4_journal_stop(handle);
5387 0 : out_mmap:
5388 0 : filemap_invalidate_unlock(mapping);
5389 0 : out_mutex:
5390 0 : inode_unlock(inode);
5391 0 : return ret;
5392 : }
5393 :
5394 : /*
5395 : * ext4_insert_range:
5396 : * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
5397 : * The data blocks starting from @offset to the EOF are shifted by @len
5398 : * towards right to create a hole in the @inode. Inode size is increased
5399 : * by len bytes.
5400 : * Returns 0 on success, error otherwise.
5401 : */
5402 0 : static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
5403 : {
5404 0 : struct inode *inode = file_inode(file);
5405 0 : struct super_block *sb = inode->i_sb;
5406 0 : struct address_space *mapping = inode->i_mapping;
5407 0 : handle_t *handle;
5408 0 : struct ext4_ext_path *path;
5409 0 : struct ext4_extent *extent;
5410 0 : ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
5411 0 : unsigned int credits, ee_len;
5412 0 : int ret = 0, depth, split_flag = 0;
5413 0 : loff_t ioffset;
5414 :
5415 : /*
5416 : * We need to test this early because xfstests assumes that an
5417 : * insert range of (0, 1) will return EOPNOTSUPP if the file
5418 : * system does not support insert range.
5419 : */
5420 0 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5421 : return -EOPNOTSUPP;
5422 :
5423 : /* Insert range works only on fs cluster size aligned regions. */
5424 0 : if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
5425 : return -EINVAL;
5426 :
5427 0 : trace_ext4_insert_range(inode, offset, len);
5428 :
5429 0 : offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5430 0 : len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
5431 :
5432 0 : inode_lock(inode);
5433 : /* Currently just for extent based files */
5434 0 : if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5435 0 : ret = -EOPNOTSUPP;
5436 0 : goto out_mutex;
5437 : }
5438 :
5439 : /* Check whether the maximum file size would be exceeded */
5440 0 : if (len > inode->i_sb->s_maxbytes - inode->i_size) {
5441 0 : ret = -EFBIG;
5442 0 : goto out_mutex;
5443 : }
5444 :
5445 : /* Offset must be less than i_size */
5446 0 : if (offset >= inode->i_size) {
5447 0 : ret = -EINVAL;
5448 0 : goto out_mutex;
5449 : }
5450 :
5451 : /* Wait for existing dio to complete */
5452 0 : inode_dio_wait(inode);
5453 :
5454 0 : ret = file_modified(file);
5455 0 : if (ret)
5456 0 : goto out_mutex;
5457 :
5458 : /*
5459 : * Prevent page faults from reinstantiating pages we have released from
5460 : * page cache.
5461 : */
5462 0 : filemap_invalidate_lock(mapping);
5463 :
5464 0 : ret = ext4_break_layouts(inode);
5465 0 : if (ret)
5466 0 : goto out_mmap;
5467 :
5468 : /*
5469 : * Need to round down to align start offset to page size boundary
5470 : * for page size > block size.
5471 : */
5472 0 : ioffset = round_down(offset, PAGE_SIZE);
5473 : /* Write out all dirty pages */
5474 0 : ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5475 : LLONG_MAX);
5476 0 : if (ret)
5477 0 : goto out_mmap;
5478 0 : truncate_pagecache(inode, ioffset);
5479 :
5480 0 : credits = ext4_writepage_trans_blocks(inode);
5481 0 : handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5482 0 : if (IS_ERR(handle)) {
5483 0 : ret = PTR_ERR(handle);
5484 0 : goto out_mmap;
5485 : }
5486 0 : ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
5487 :
5488 : /* Expand file to avoid data loss if there is error while shifting */
5489 0 : inode->i_size += len;
5490 0 : EXT4_I(inode)->i_disksize += len;
5491 0 : inode->i_mtime = inode->i_ctime = current_time(inode);
5492 0 : ret = ext4_mark_inode_dirty(handle, inode);
5493 0 : if (ret)
5494 0 : goto out_stop;
5495 :
5496 0 : down_write(&EXT4_I(inode)->i_data_sem);
5497 0 : ext4_discard_preallocations(inode, 0);
5498 :
5499 0 : path = ext4_find_extent(inode, offset_lblk, NULL, 0);
5500 0 : if (IS_ERR(path)) {
5501 0 : up_write(&EXT4_I(inode)->i_data_sem);
5502 0 : goto out_stop;
5503 : }
5504 :
5505 0 : depth = ext_depth(inode);
5506 0 : extent = path[depth].p_ext;
5507 0 : if (extent) {
5508 0 : ee_start_lblk = le32_to_cpu(extent->ee_block);
5509 0 : ee_len = ext4_ext_get_actual_len(extent);
5510 :
5511 : /*
5512 : * If offset_lblk is not the starting block of extent, split
5513 : * the extent @offset_lblk
5514 : */
5515 0 : if ((offset_lblk > ee_start_lblk) &&
5516 0 : (offset_lblk < (ee_start_lblk + ee_len))) {
5517 0 : if (ext4_ext_is_unwritten(extent))
5518 0 : split_flag = EXT4_EXT_MARK_UNWRIT1 |
5519 : EXT4_EXT_MARK_UNWRIT2;
5520 0 : ret = ext4_split_extent_at(handle, inode, &path,
5521 : offset_lblk, split_flag,
5522 : EXT4_EX_NOCACHE |
5523 : EXT4_GET_BLOCKS_PRE_IO |
5524 : EXT4_GET_BLOCKS_METADATA_NOFAIL);
5525 : }
5526 :
5527 0 : ext4_free_ext_path(path);
5528 0 : if (ret < 0) {
5529 0 : up_write(&EXT4_I(inode)->i_data_sem);
5530 0 : goto out_stop;
5531 : }
5532 : } else {
5533 0 : ext4_free_ext_path(path);
5534 : }
5535 :
5536 0 : ext4_es_remove_extent(inode, offset_lblk, EXT_MAX_BLOCKS - offset_lblk);
5537 :
5538 : /*
5539 : * if offset_lblk lies in a hole which is at start of file, use
5540 : * ee_start_lblk to shift extents
5541 : */
5542 0 : ret = ext4_ext_shift_extents(inode, handle,
5543 0 : max(ee_start_lblk, offset_lblk), len_lblk, SHIFT_RIGHT);
5544 :
5545 0 : up_write(&EXT4_I(inode)->i_data_sem);
5546 0 : if (IS_SYNC(inode))
5547 0 : ext4_handle_sync(handle);
5548 0 : if (ret >= 0)
5549 0 : ext4_update_inode_fsync_trans(handle, inode, 1);
5550 :
5551 0 : out_stop:
5552 0 : ext4_journal_stop(handle);
5553 0 : out_mmap:
5554 0 : filemap_invalidate_unlock(mapping);
5555 0 : out_mutex:
5556 0 : inode_unlock(inode);
5557 0 : return ret;
5558 : }
5559 :
5560 : /**
5561 : * ext4_swap_extents() - Swap extents between two inodes
5562 : * @handle: handle for this transaction
5563 : * @inode1: First inode
5564 : * @inode2: Second inode
5565 : * @lblk1: Start block for first inode
5566 : * @lblk2: Start block for second inode
5567 : * @count: Number of blocks to swap
5568 : * @unwritten: Mark second inode's extents as unwritten after swap
5569 : * @erp: Pointer to save error value
5570 : *
5571 : * This helper routine does exactly what is promise "swap extents". All other
5572 : * stuff such as page-cache locking consistency, bh mapping consistency or
5573 : * extent's data copying must be performed by caller.
5574 : * Locking:
5575 : * i_rwsem is held for both inodes
5576 : * i_data_sem is locked for write for both inodes
5577 : * Assumptions:
5578 : * All pages from requested range are locked for both inodes
5579 : */
5580 : int
5581 0 : ext4_swap_extents(handle_t *handle, struct inode *inode1,
5582 : struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
5583 : ext4_lblk_t count, int unwritten, int *erp)
5584 : {
5585 0 : struct ext4_ext_path *path1 = NULL;
5586 0 : struct ext4_ext_path *path2 = NULL;
5587 0 : int replaced_count = 0;
5588 :
5589 0 : BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
5590 0 : BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
5591 0 : BUG_ON(!inode_is_locked(inode1));
5592 0 : BUG_ON(!inode_is_locked(inode2));
5593 :
5594 0 : ext4_es_remove_extent(inode1, lblk1, count);
5595 0 : ext4_es_remove_extent(inode2, lblk2, count);
5596 :
5597 0 : while (count) {
5598 0 : struct ext4_extent *ex1, *ex2, tmp_ex;
5599 0 : ext4_lblk_t e1_blk, e2_blk;
5600 0 : int e1_len, e2_len, len;
5601 0 : int split = 0;
5602 :
5603 0 : path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
5604 0 : if (IS_ERR(path1)) {
5605 0 : *erp = PTR_ERR(path1);
5606 0 : path1 = NULL;
5607 0 : finish:
5608 0 : count = 0;
5609 0 : goto repeat;
5610 : }
5611 0 : path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
5612 0 : if (IS_ERR(path2)) {
5613 0 : *erp = PTR_ERR(path2);
5614 0 : path2 = NULL;
5615 0 : goto finish;
5616 : }
5617 0 : ex1 = path1[path1->p_depth].p_ext;
5618 0 : ex2 = path2[path2->p_depth].p_ext;
5619 : /* Do we have something to swap ? */
5620 0 : if (unlikely(!ex2 || !ex1))
5621 0 : goto finish;
5622 :
5623 0 : e1_blk = le32_to_cpu(ex1->ee_block);
5624 0 : e2_blk = le32_to_cpu(ex2->ee_block);
5625 0 : e1_len = ext4_ext_get_actual_len(ex1);
5626 0 : e2_len = ext4_ext_get_actual_len(ex2);
5627 :
5628 : /* Hole handling */
5629 0 : if (!in_range(lblk1, e1_blk, e1_len) ||
5630 0 : !in_range(lblk2, e2_blk, e2_len)) {
5631 0 : ext4_lblk_t next1, next2;
5632 :
5633 : /* if hole after extent, then go to next extent */
5634 0 : next1 = ext4_ext_next_allocated_block(path1);
5635 0 : next2 = ext4_ext_next_allocated_block(path2);
5636 : /* If hole before extent, then shift to that extent */
5637 0 : if (e1_blk > lblk1)
5638 0 : next1 = e1_blk;
5639 0 : if (e2_blk > lblk2)
5640 0 : next2 = e2_blk;
5641 : /* Do we have something to swap */
5642 0 : if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
5643 0 : goto finish;
5644 : /* Move to the rightest boundary */
5645 0 : len = next1 - lblk1;
5646 0 : if (len < next2 - lblk2)
5647 : len = next2 - lblk2;
5648 0 : if (len > count)
5649 0 : len = count;
5650 0 : lblk1 += len;
5651 0 : lblk2 += len;
5652 0 : count -= len;
5653 0 : goto repeat;
5654 : }
5655 :
5656 : /* Prepare left boundary */
5657 0 : if (e1_blk < lblk1) {
5658 0 : split = 1;
5659 0 : *erp = ext4_force_split_extent_at(handle, inode1,
5660 : &path1, lblk1, 0);
5661 0 : if (unlikely(*erp))
5662 0 : goto finish;
5663 : }
5664 0 : if (e2_blk < lblk2) {
5665 0 : split = 1;
5666 0 : *erp = ext4_force_split_extent_at(handle, inode2,
5667 : &path2, lblk2, 0);
5668 0 : if (unlikely(*erp))
5669 0 : goto finish;
5670 : }
5671 : /* ext4_split_extent_at() may result in leaf extent split,
5672 : * path must to be revalidated. */
5673 0 : if (split)
5674 0 : goto repeat;
5675 :
5676 : /* Prepare right boundary */
5677 0 : len = count;
5678 0 : if (len > e1_blk + e1_len - lblk1)
5679 : len = e1_blk + e1_len - lblk1;
5680 0 : if (len > e2_blk + e2_len - lblk2)
5681 0 : len = e2_blk + e2_len - lblk2;
5682 :
5683 0 : if (len != e1_len) {
5684 0 : split = 1;
5685 0 : *erp = ext4_force_split_extent_at(handle, inode1,
5686 : &path1, lblk1 + len, 0);
5687 0 : if (unlikely(*erp))
5688 0 : goto finish;
5689 : }
5690 0 : if (len != e2_len) {
5691 0 : split = 1;
5692 0 : *erp = ext4_force_split_extent_at(handle, inode2,
5693 : &path2, lblk2 + len, 0);
5694 0 : if (*erp)
5695 0 : goto finish;
5696 : }
5697 : /* ext4_split_extent_at() may result in leaf extent split,
5698 : * path must to be revalidated. */
5699 0 : if (split)
5700 0 : goto repeat;
5701 :
5702 0 : BUG_ON(e2_len != e1_len);
5703 0 : *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
5704 0 : if (unlikely(*erp))
5705 0 : goto finish;
5706 0 : *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
5707 0 : if (unlikely(*erp))
5708 0 : goto finish;
5709 :
5710 : /* Both extents are fully inside boundaries. Swap it now */
5711 0 : tmp_ex = *ex1;
5712 0 : ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
5713 0 : ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
5714 0 : ex1->ee_len = cpu_to_le16(e2_len);
5715 0 : ex2->ee_len = cpu_to_le16(e1_len);
5716 0 : if (unwritten)
5717 0 : ext4_ext_mark_unwritten(ex2);
5718 0 : if (ext4_ext_is_unwritten(&tmp_ex))
5719 0 : ext4_ext_mark_unwritten(ex1);
5720 :
5721 0 : ext4_ext_try_to_merge(handle, inode2, path2, ex2);
5722 0 : ext4_ext_try_to_merge(handle, inode1, path1, ex1);
5723 0 : *erp = ext4_ext_dirty(handle, inode2, path2 +
5724 : path2->p_depth);
5725 0 : if (unlikely(*erp))
5726 0 : goto finish;
5727 0 : *erp = ext4_ext_dirty(handle, inode1, path1 +
5728 : path1->p_depth);
5729 : /*
5730 : * Looks scarry ah..? second inode already points to new blocks,
5731 : * and it was successfully dirtied. But luckily error may happen
5732 : * only due to journal error, so full transaction will be
5733 : * aborted anyway.
5734 : */
5735 0 : if (unlikely(*erp))
5736 0 : goto finish;
5737 0 : lblk1 += len;
5738 0 : lblk2 += len;
5739 0 : replaced_count += len;
5740 0 : count -= len;
5741 :
5742 0 : repeat:
5743 0 : ext4_free_ext_path(path1);
5744 0 : ext4_free_ext_path(path2);
5745 0 : path1 = path2 = NULL;
5746 : }
5747 0 : return replaced_count;
5748 : }
5749 :
5750 : /*
5751 : * ext4_clu_mapped - determine whether any block in a logical cluster has
5752 : * been mapped to a physical cluster
5753 : *
5754 : * @inode - file containing the logical cluster
5755 : * @lclu - logical cluster of interest
5756 : *
5757 : * Returns 1 if any block in the logical cluster is mapped, signifying
5758 : * that a physical cluster has been allocated for it. Otherwise,
5759 : * returns 0. Can also return negative error codes. Derived from
5760 : * ext4_ext_map_blocks().
5761 : */
5762 0 : int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
5763 : {
5764 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5765 0 : struct ext4_ext_path *path;
5766 0 : int depth, mapped = 0, err = 0;
5767 0 : struct ext4_extent *extent;
5768 0 : ext4_lblk_t first_lblk, first_lclu, last_lclu;
5769 :
5770 : /*
5771 : * if data can be stored inline, the logical cluster isn't
5772 : * mapped - no physical clusters have been allocated, and the
5773 : * file has no extents
5774 : */
5775 0 : if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) ||
5776 : ext4_has_inline_data(inode))
5777 : return 0;
5778 :
5779 : /* search for the extent closest to the first block in the cluster */
5780 0 : path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
5781 0 : if (IS_ERR(path)) {
5782 0 : err = PTR_ERR(path);
5783 0 : path = NULL;
5784 0 : goto out;
5785 : }
5786 :
5787 0 : depth = ext_depth(inode);
5788 :
5789 : /*
5790 : * A consistent leaf must not be empty. This situation is possible,
5791 : * though, _during_ tree modification, and it's why an assert can't
5792 : * be put in ext4_find_extent().
5793 : */
5794 0 : if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
5795 0 : EXT4_ERROR_INODE(inode,
5796 : "bad extent address - lblock: %lu, depth: %d, pblock: %lld",
5797 : (unsigned long) EXT4_C2B(sbi, lclu),
5798 : depth, path[depth].p_block);
5799 0 : err = -EFSCORRUPTED;
5800 0 : goto out;
5801 : }
5802 :
5803 0 : extent = path[depth].p_ext;
5804 :
5805 : /* can't be mapped if the extent tree is empty */
5806 0 : if (extent == NULL)
5807 0 : goto out;
5808 :
5809 0 : first_lblk = le32_to_cpu(extent->ee_block);
5810 0 : first_lclu = EXT4_B2C(sbi, first_lblk);
5811 :
5812 : /*
5813 : * Three possible outcomes at this point - found extent spanning
5814 : * the target cluster, to the left of the target cluster, or to the
5815 : * right of the target cluster. The first two cases are handled here.
5816 : * The last case indicates the target cluster is not mapped.
5817 : */
5818 0 : if (lclu >= first_lclu) {
5819 0 : last_lclu = EXT4_B2C(sbi, first_lblk +
5820 : ext4_ext_get_actual_len(extent) - 1);
5821 0 : if (lclu <= last_lclu) {
5822 : mapped = 1;
5823 : } else {
5824 0 : first_lblk = ext4_ext_next_allocated_block(path);
5825 0 : first_lclu = EXT4_B2C(sbi, first_lblk);
5826 0 : if (lclu == first_lclu)
5827 0 : mapped = 1;
5828 : }
5829 : }
5830 :
5831 0 : out:
5832 0 : ext4_free_ext_path(path);
5833 :
5834 0 : return err ? err : mapped;
5835 : }
5836 :
5837 : /*
5838 : * Updates physical block address and unwritten status of extent
5839 : * starting at lblk start and of len. If such an extent doesn't exist,
5840 : * this function splits the extent tree appropriately to create an
5841 : * extent like this. This function is called in the fast commit
5842 : * replay path. Returns 0 on success and error on failure.
5843 : */
5844 0 : int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
5845 : int len, int unwritten, ext4_fsblk_t pblk)
5846 : {
5847 0 : struct ext4_ext_path *path = NULL, *ppath;
5848 0 : struct ext4_extent *ex;
5849 0 : int ret;
5850 :
5851 0 : path = ext4_find_extent(inode, start, NULL, 0);
5852 0 : if (IS_ERR(path))
5853 0 : return PTR_ERR(path);
5854 0 : ex = path[path->p_depth].p_ext;
5855 0 : if (!ex) {
5856 0 : ret = -EFSCORRUPTED;
5857 0 : goto out;
5858 : }
5859 :
5860 0 : if (le32_to_cpu(ex->ee_block) != start ||
5861 : ext4_ext_get_actual_len(ex) != len) {
5862 : /* We need to split this extent to match our extent first */
5863 0 : ppath = path;
5864 0 : down_write(&EXT4_I(inode)->i_data_sem);
5865 0 : ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1);
5866 0 : up_write(&EXT4_I(inode)->i_data_sem);
5867 0 : if (ret)
5868 0 : goto out;
5869 0 : kfree(path);
5870 0 : path = ext4_find_extent(inode, start, NULL, 0);
5871 0 : if (IS_ERR(path))
5872 : return -1;
5873 0 : ppath = path;
5874 0 : ex = path[path->p_depth].p_ext;
5875 0 : WARN_ON(le32_to_cpu(ex->ee_block) != start);
5876 0 : if (ext4_ext_get_actual_len(ex) != len) {
5877 0 : down_write(&EXT4_I(inode)->i_data_sem);
5878 0 : ret = ext4_force_split_extent_at(NULL, inode, &ppath,
5879 : start + len, 1);
5880 0 : up_write(&EXT4_I(inode)->i_data_sem);
5881 0 : if (ret)
5882 0 : goto out;
5883 0 : kfree(path);
5884 0 : path = ext4_find_extent(inode, start, NULL, 0);
5885 0 : if (IS_ERR(path))
5886 : return -EINVAL;
5887 0 : ex = path[path->p_depth].p_ext;
5888 : }
5889 : }
5890 0 : if (unwritten)
5891 0 : ext4_ext_mark_unwritten(ex);
5892 : else
5893 0 : ext4_ext_mark_initialized(ex);
5894 0 : ext4_ext_store_pblock(ex, pblk);
5895 0 : down_write(&EXT4_I(inode)->i_data_sem);
5896 0 : ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5897 0 : up_write(&EXT4_I(inode)->i_data_sem);
5898 0 : out:
5899 0 : ext4_free_ext_path(path);
5900 0 : ext4_mark_inode_dirty(NULL, inode);
5901 0 : return ret;
5902 : }
5903 :
5904 : /* Try to shrink the extent tree */
5905 0 : void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
5906 : {
5907 0 : struct ext4_ext_path *path = NULL;
5908 0 : struct ext4_extent *ex;
5909 0 : ext4_lblk_t old_cur, cur = 0;
5910 :
5911 0 : while (cur < end) {
5912 0 : path = ext4_find_extent(inode, cur, NULL, 0);
5913 0 : if (IS_ERR(path))
5914 : return;
5915 0 : ex = path[path->p_depth].p_ext;
5916 0 : if (!ex) {
5917 0 : ext4_free_ext_path(path);
5918 0 : ext4_mark_inode_dirty(NULL, inode);
5919 0 : return;
5920 : }
5921 0 : old_cur = cur;
5922 0 : cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5923 0 : if (cur <= old_cur)
5924 0 : cur = old_cur + 1;
5925 0 : ext4_ext_try_to_merge(NULL, inode, path, ex);
5926 0 : down_write(&EXT4_I(inode)->i_data_sem);
5927 0 : ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5928 0 : up_write(&EXT4_I(inode)->i_data_sem);
5929 0 : ext4_mark_inode_dirty(NULL, inode);
5930 0 : ext4_free_ext_path(path);
5931 : }
5932 : }
5933 :
5934 : /* Check if *cur is a hole and if it is, skip it */
5935 0 : static int skip_hole(struct inode *inode, ext4_lblk_t *cur)
5936 : {
5937 0 : int ret;
5938 0 : struct ext4_map_blocks map;
5939 :
5940 0 : map.m_lblk = *cur;
5941 0 : map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur;
5942 :
5943 0 : ret = ext4_map_blocks(NULL, inode, &map, 0);
5944 0 : if (ret < 0)
5945 : return ret;
5946 0 : if (ret != 0)
5947 : return 0;
5948 0 : *cur = *cur + map.m_len;
5949 0 : return 0;
5950 : }
5951 :
5952 : /* Count number of blocks used by this inode and update i_blocks */
5953 0 : int ext4_ext_replay_set_iblocks(struct inode *inode)
5954 : {
5955 0 : struct ext4_ext_path *path = NULL, *path2 = NULL;
5956 0 : struct ext4_extent *ex;
5957 0 : ext4_lblk_t cur = 0, end;
5958 0 : int numblks = 0, i, ret = 0;
5959 0 : ext4_fsblk_t cmp1, cmp2;
5960 0 : struct ext4_map_blocks map;
5961 :
5962 : /* Determin the size of the file first */
5963 0 : path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5964 : EXT4_EX_NOCACHE);
5965 0 : if (IS_ERR(path))
5966 0 : return PTR_ERR(path);
5967 0 : ex = path[path->p_depth].p_ext;
5968 0 : if (!ex) {
5969 0 : ext4_free_ext_path(path);
5970 0 : goto out;
5971 : }
5972 0 : end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5973 0 : ext4_free_ext_path(path);
5974 :
5975 : /* Count the number of data blocks */
5976 0 : cur = 0;
5977 0 : while (cur < end) {
5978 0 : map.m_lblk = cur;
5979 0 : map.m_len = end - cur;
5980 0 : ret = ext4_map_blocks(NULL, inode, &map, 0);
5981 0 : if (ret < 0)
5982 : break;
5983 0 : if (ret > 0)
5984 0 : numblks += ret;
5985 0 : cur = cur + map.m_len;
5986 : }
5987 :
5988 : /*
5989 : * Count the number of extent tree blocks. We do it by looking up
5990 : * two successive extents and determining the difference between
5991 : * their paths. When path is different for 2 successive extents
5992 : * we compare the blocks in the path at each level and increment
5993 : * iblocks by total number of differences found.
5994 : */
5995 0 : cur = 0;
5996 0 : ret = skip_hole(inode, &cur);
5997 0 : if (ret < 0)
5998 0 : goto out;
5999 0 : path = ext4_find_extent(inode, cur, NULL, 0);
6000 0 : if (IS_ERR(path))
6001 0 : goto out;
6002 0 : numblks += path->p_depth;
6003 0 : ext4_free_ext_path(path);
6004 0 : while (cur < end) {
6005 0 : path = ext4_find_extent(inode, cur, NULL, 0);
6006 0 : if (IS_ERR(path))
6007 : break;
6008 0 : ex = path[path->p_depth].p_ext;
6009 0 : if (!ex) {
6010 0 : ext4_free_ext_path(path);
6011 0 : return 0;
6012 : }
6013 0 : cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
6014 : ext4_ext_get_actual_len(ex));
6015 0 : ret = skip_hole(inode, &cur);
6016 0 : if (ret < 0) {
6017 0 : ext4_free_ext_path(path);
6018 : break;
6019 : }
6020 0 : path2 = ext4_find_extent(inode, cur, NULL, 0);
6021 0 : if (IS_ERR(path2)) {
6022 0 : ext4_free_ext_path(path);
6023 : break;
6024 : }
6025 0 : for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
6026 0 : cmp1 = cmp2 = 0;
6027 0 : if (i <= path->p_depth)
6028 0 : cmp1 = path[i].p_bh ?
6029 0 : path[i].p_bh->b_blocknr : 0;
6030 0 : if (i <= path2->p_depth)
6031 0 : cmp2 = path2[i].p_bh ?
6032 0 : path2[i].p_bh->b_blocknr : 0;
6033 0 : if (cmp1 != cmp2 && cmp2 != 0)
6034 0 : numblks++;
6035 : }
6036 0 : ext4_free_ext_path(path);
6037 0 : ext4_free_ext_path(path2);
6038 : }
6039 :
6040 0 : out:
6041 0 : inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9);
6042 0 : ext4_mark_inode_dirty(NULL, inode);
6043 0 : return 0;
6044 : }
6045 :
6046 0 : int ext4_ext_clear_bb(struct inode *inode)
6047 : {
6048 0 : struct ext4_ext_path *path = NULL;
6049 0 : struct ext4_extent *ex;
6050 0 : ext4_lblk_t cur = 0, end;
6051 0 : int j, ret = 0;
6052 0 : struct ext4_map_blocks map;
6053 :
6054 0 : if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
6055 : return 0;
6056 :
6057 : /* Determin the size of the file first */
6058 0 : path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
6059 : EXT4_EX_NOCACHE);
6060 0 : if (IS_ERR(path))
6061 0 : return PTR_ERR(path);
6062 0 : ex = path[path->p_depth].p_ext;
6063 0 : if (!ex) {
6064 0 : ext4_free_ext_path(path);
6065 0 : return 0;
6066 : }
6067 0 : end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
6068 0 : ext4_free_ext_path(path);
6069 :
6070 0 : cur = 0;
6071 0 : while (cur < end) {
6072 0 : map.m_lblk = cur;
6073 0 : map.m_len = end - cur;
6074 0 : ret = ext4_map_blocks(NULL, inode, &map, 0);
6075 0 : if (ret < 0)
6076 : break;
6077 0 : if (ret > 0) {
6078 0 : path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
6079 0 : if (!IS_ERR_OR_NULL(path)) {
6080 0 : for (j = 0; j < path->p_depth; j++) {
6081 :
6082 0 : ext4_mb_mark_bb(inode->i_sb,
6083 0 : path[j].p_block, 1, 0);
6084 0 : ext4_fc_record_regions(inode->i_sb, inode->i_ino,
6085 : 0, path[j].p_block, 1, 1);
6086 : }
6087 0 : ext4_free_ext_path(path);
6088 : }
6089 0 : ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
6090 0 : ext4_fc_record_regions(inode->i_sb, inode->i_ino,
6091 0 : map.m_lblk, map.m_pblk, map.m_len, 1);
6092 : }
6093 0 : cur = cur + map.m_len;
6094 : }
6095 :
6096 : return 0;
6097 : }
|