LCOV - code coverage report
Current view: top level - fs/ext4 - extents.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 2173 2873 75.6 %
Date: 2023-07-31 20:08:12 Functions: 76 87 87.4 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
       4             :  * Written by Alex Tomas <alex@clusterfs.com>
       5             :  *
       6             :  * Architecture independence:
       7             :  *   Copyright (c) 2005, Bull S.A.
       8             :  *   Written by Pierre Peiffer <pierre.peiffer@bull.net>
       9             :  */
      10             : 
      11             : /*
      12             :  * Extents support for EXT4
      13             :  *
      14             :  * TODO:
      15             :  *   - ext4*_error() should be used in some situations
      16             :  *   - analyze all BUG()/BUG_ON(), use -EIO where appropriate
      17             :  *   - smart tree reduction
      18             :  */
      19             : 
      20             : #include <linux/fs.h>
      21             : #include <linux/time.h>
      22             : #include <linux/jbd2.h>
      23             : #include <linux/highuid.h>
      24             : #include <linux/pagemap.h>
      25             : #include <linux/quotaops.h>
      26             : #include <linux/string.h>
      27             : #include <linux/slab.h>
      28             : #include <linux/uaccess.h>
      29             : #include <linux/fiemap.h>
      30             : #include <linux/iomap.h>
      31             : #include <linux/sched/mm.h>
      32             : #include "ext4_jbd2.h"
      33             : #include "ext4_extents.h"
      34             : #include "xattr.h"
      35             : 
      36             : #include <trace/events/ext4.h>
      37             : 
      38             : /*
      39             :  * used by extent splitting.
      40             :  */
      41             : #define EXT4_EXT_MAY_ZEROOUT    0x1  /* safe to zeroout if split fails \
      42             :                                         due to ENOSPC */
      43             : #define EXT4_EXT_MARK_UNWRIT1   0x2  /* mark first half unwritten */
      44             : #define EXT4_EXT_MARK_UNWRIT2   0x4  /* mark second half unwritten */
      45             : 
      46             : #define EXT4_EXT_DATA_VALID1    0x8  /* first half contains valid data */
      47             : #define EXT4_EXT_DATA_VALID2    0x10 /* second half contains valid data */
      48             : 
      49    14361050 : static __le32 ext4_extent_block_csum(struct inode *inode,
      50             :                                      struct ext4_extent_header *eh)
      51             : {
      52    14361050 :         struct ext4_inode_info *ei = EXT4_I(inode);
      53    14361050 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
      54    14361050 :         __u32 csum;
      55             : 
      56    14361050 :         csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh,
      57    14361050 :                            EXT4_EXTENT_TAIL_OFFSET(eh));
      58    14361365 :         return cpu_to_le32(csum);
      59             : }
      60             : 
      61        9111 : static int ext4_extent_block_csum_verify(struct inode *inode,
      62             :                                          struct ext4_extent_header *eh)
      63             : {
      64        9111 :         struct ext4_extent_tail *et;
      65             : 
      66        9111 :         if (!ext4_has_metadata_csum(inode->i_sb))
      67             :                 return 1;
      68             : 
      69        9111 :         et = find_ext4_extent_tail(eh);
      70        9111 :         if (et->et_checksum != ext4_extent_block_csum(inode, eh))
      71           0 :                 return 0;
      72             :         return 1;
      73             : }
      74             : 
      75    14352593 : static void ext4_extent_block_csum_set(struct inode *inode,
      76             :                                        struct ext4_extent_header *eh)
      77             : {
      78    14352593 :         struct ext4_extent_tail *et;
      79             : 
      80    14352593 :         if (!ext4_has_metadata_csum(inode->i_sb))
      81             :                 return;
      82             : 
      83    14351880 :         et = find_ext4_extent_tail(eh);
      84    14351880 :         et->et_checksum = ext4_extent_block_csum(inode, eh);
      85             : }
      86             : 
      87             : static int ext4_split_extent_at(handle_t *handle,
      88             :                              struct inode *inode,
      89             :                              struct ext4_ext_path **ppath,
      90             :                              ext4_lblk_t split,
      91             :                              int split_flag,
      92             :                              int flags);
      93             : 
      94          70 : static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
      95             : {
      96             :         /*
      97             :          * Drop i_data_sem to avoid deadlock with ext4_map_blocks.  At this
      98             :          * moment, get_block can be called only for blocks inside i_size since
      99             :          * page cache has been already dropped and writes are blocked by
     100             :          * i_rwsem. So we can safely drop the i_data_sem here.
     101             :          */
     102          70 :         BUG_ON(EXT4_JOURNAL(inode) == NULL);
     103          70 :         ext4_discard_preallocations(inode, 0);
     104          70 :         up_write(&EXT4_I(inode)->i_data_sem);
     105          70 :         *dropped = 1;
     106          70 :         return 0;
     107             : }
     108             : 
     109    27983895 : static void ext4_ext_drop_refs(struct ext4_ext_path *path)
     110             : {
     111    27983895 :         int depth, i;
     112             : 
     113    27983895 :         if (!path)
     114             :                 return;
     115    21902665 :         depth = path->p_depth;
     116    67804977 :         for (i = 0; i <= depth; i++, path++) {
     117    45911890 :                 brelse(path->p_bh);
     118    45902312 :                 path->p_bh = NULL;
     119             :         }
     120             : }
     121             : 
     122     2292185 : void ext4_free_ext_path(struct ext4_ext_path *path)
     123             : {
     124     2292185 :         ext4_ext_drop_refs(path);
     125    19296013 :         kfree(path);
     126        1001 : }
     127             : 
     128             : /*
     129             :  * Make sure 'handle' has at least 'check_cred' credits. If not, restart
     130             :  * transaction with 'restart_cred' credits. The function drops i_data_sem
     131             :  * when restarting transaction and gets it after transaction is restarted.
     132             :  *
     133             :  * The function returns 0 on success, 1 if transaction had to be restarted,
     134             :  * and < 0 in case of fatal error.
     135             :  */
     136     2655788 : int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
     137             :                                 int check_cred, int restart_cred,
     138             :                                 int revoke_cred)
     139             : {
     140     2655788 :         int ret;
     141     2655788 :         int dropped = 0;
     142             : 
     143     2655858 :         ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
     144             :                 revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped));
     145     2655760 :         if (dropped)
     146          70 :                 down_write(&EXT4_I(inode)->i_data_sem);
     147     2655760 :         return ret;
     148             : }
     149             : 
     150             : /*
     151             :  * could return:
     152             :  *  - EROFS
     153             :  *  - ENOMEM
     154             :  */
     155    16612231 : static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
     156             :                                 struct ext4_ext_path *path)
     157             : {
     158    16612231 :         int err = 0;
     159             : 
     160    16612231 :         if (path->p_bh) {
     161             :                 /* path points to block */
     162    14324335 :                 BUFFER_TRACE(path->p_bh, "get_write_access");
     163    14324335 :                 err = ext4_journal_get_write_access(handle, inode->i_sb,
     164             :                                                     path->p_bh, EXT4_JTR_NONE);
     165             :                 /*
     166             :                  * The extent buffer's verified bit will be set again in
     167             :                  * __ext4_ext_dirty(). We could leave an inconsistent
     168             :                  * buffer if the extents updating procudure break off du
     169             :                  * to some error happens, force to check it again.
     170             :                  */
     171    14324634 :                 if (!err)
     172    14324629 :                         clear_buffer_verified(path->p_bh);
     173             :         }
     174             :         /* path points to leaf/index in inode body */
     175             :         /* we use in-core data, no need to protect them */
     176    16612676 :         return err;
     177             : }
     178             : 
     179             : /*
     180             :  * could return:
     181             :  *  - EROFS
     182             :  *  - ENOMEM
     183             :  *  - EIO
     184             :  */
     185    16612068 : static int __ext4_ext_dirty(const char *where, unsigned int line,
     186             :                             handle_t *handle, struct inode *inode,
     187             :                             struct ext4_ext_path *path)
     188             : {
     189    16612068 :         int err;
     190             : 
     191    16612068 :         WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
     192    16612068 :         if (path->p_bh) {
     193    14284293 :                 ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
     194             :                 /* path points to block */
     195    14284396 :                 err = __ext4_handle_dirty_metadata(where, line, handle,
     196             :                                                    inode, path->p_bh);
     197             :                 /* Extents updating done, re-set verified flag */
     198    14284486 :                 if (!err)
     199    14284397 :                         set_buffer_verified(path->p_bh);
     200             :         } else {
     201             :                 /* path points to leaf/index in inode body */
     202     2327775 :                 err = ext4_mark_inode_dirty(handle, inode);
     203             :         }
     204    16613729 :         return err;
     205             : }
     206             : 
     207             : #define ext4_ext_dirty(handle, inode, path) \
     208             :                 __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
     209             : 
     210     3820209 : static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
     211             :                               struct ext4_ext_path *path,
     212             :                               ext4_lblk_t block)
     213             : {
     214     3820209 :         if (path) {
     215     3820209 :                 int depth = path->p_depth;
     216     3820209 :                 struct ext4_extent *ex;
     217             : 
     218             :                 /*
     219             :                  * Try to predict block placement assuming that we are
     220             :                  * filling in a file which will eventually be
     221             :                  * non-sparse --- i.e., in the case of libbfd writing
     222             :                  * an ELF object sections out-of-order but in a way
     223             :                  * the eventually results in a contiguous object or
     224             :                  * executable file, or some database extending a table
     225             :                  * space file.  However, this is actually somewhat
     226             :                  * non-ideal if we are writing a sparse file such as
     227             :                  * qemu or KVM writing a raw image file that is going
     228             :                  * to stay fairly sparse, since it will end up
     229             :                  * fragmenting the file system's free space.  Maybe we
     230             :                  * should have some hueristics or some way to allow
     231             :                  * userspace to pass a hint to file system,
     232             :                  * especially if the latter case turns out to be
     233             :                  * common.
     234             :                  */
     235     3820209 :                 ex = path[depth].p_ext;
     236     3820209 :                 if (ex) {
     237     3039429 :                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
     238     3039429 :                         ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
     239             : 
     240     3039429 :                         if (block > ext_block)
     241     2983815 :                                 return ext_pblk + (block - ext_block);
     242             :                         else
     243       55614 :                                 return ext_pblk - (ext_block - block);
     244             :                 }
     245             : 
     246             :                 /* it looks like index is empty;
     247             :                  * try to find starting block from index itself */
     248      780780 :                 if (path[depth].p_bh)
     249           0 :                         return path[depth].p_bh->b_blocknr;
     250             :         }
     251             : 
     252             :         /* OK. use inode's group */
     253      780780 :         return ext4_inode_to_goal_block(inode);
     254             : }
     255             : 
     256             : /*
     257             :  * Allocation for a meta data block
     258             :  */
     259             : static ext4_fsblk_t
     260        6472 : ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
     261             :                         struct ext4_ext_path *path,
     262             :                         struct ext4_extent *ex, int *err, unsigned int flags)
     263             : {
     264        6472 :         ext4_fsblk_t goal, newblock;
     265             : 
     266        6472 :         goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
     267        6472 :         newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
     268             :                                         NULL, err);
     269        6472 :         return newblock;
     270             : }
     271             : 
     272             : static inline int ext4_ext_space_block(struct inode *inode, int check)
     273             : {
     274       77373 :         int size;
     275             : 
     276       77373 :         size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
     277       77373 :                         / sizeof(struct ext4_extent);
     278             : #ifdef AGGRESSIVE_TEST
     279             :         if (!check && size > 6)
     280             :                 size = 6;
     281             : #endif
     282       77373 :         return size;
     283             : }
     284             : 
     285             : static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
     286             : {
     287          80 :         int size;
     288             : 
     289          80 :         size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
     290          80 :                         / sizeof(struct ext4_extent_idx);
     291             : #ifdef AGGRESSIVE_TEST
     292             :         if (!check && size > 5)
     293             :                 size = 5;
     294             : #endif
     295          80 :         return size;
     296             : }
     297             : 
     298             : static inline int ext4_ext_space_root(struct inode *inode, int check)
     299             : {
     300             :         int size;
     301             : 
     302             :         size = sizeof(EXT4_I(inode)->i_data);
     303             :         size -= sizeof(struct ext4_extent_header);
     304             :         size /= sizeof(struct ext4_extent);
     305             : #ifdef AGGRESSIVE_TEST
     306             :         if (!check && size > 3)
     307             :                 size = 3;
     308             : #endif
     309             :         return size;
     310             : }
     311             : 
     312             : static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
     313             : {
     314             :         int size;
     315             : 
     316             :         size = sizeof(EXT4_I(inode)->i_data);
     317             :         size -= sizeof(struct ext4_extent_header);
     318             :         size /= sizeof(struct ext4_extent_idx);
     319             : #ifdef AGGRESSIVE_TEST
     320             :         if (!check && size > 4)
     321             :                 size = 4;
     322             : #endif
     323             :         return size;
     324             : }
     325             : 
     326             : static inline int
     327     2491898 : ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
     328             :                            struct ext4_ext_path **ppath, ext4_lblk_t lblk,
     329             :                            int nofail)
     330             : {
     331     2491898 :         struct ext4_ext_path *path = *ppath;
     332     2491898 :         int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
     333     2491898 :         int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO;
     334             : 
     335     2491898 :         if (nofail)
     336      165404 :                 flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL;
     337             : 
     338     2647527 :         return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
     339             :                         EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
     340             :                         flags);
     341             : }
     342             : 
     343             : static int
     344      806140 : ext4_ext_max_entries(struct inode *inode, int depth)
     345             : {
     346      806140 :         int max;
     347             : 
     348      806140 :         if (depth == ext_depth(inode)) {
     349             :                 if (depth == 0)
     350             :                         max = ext4_ext_space_root(inode, 1);
     351             :                 else
     352             :                         max = ext4_ext_space_root_idx(inode, 1);
     353             :         } else {
     354        9113 :                 if (depth == 0)
     355        9093 :                         max = ext4_ext_space_block(inode, 1);
     356             :                 else
     357          20 :                         max = ext4_ext_space_block_idx(inode, 1);
     358             :         }
     359             : 
     360      806140 :         return max;
     361             : }
     362             : 
     363     1556417 : static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
     364             : {
     365     1556417 :         ext4_fsblk_t block = ext4_ext_pblock(ext);
     366     1556417 :         int len = ext4_ext_get_actual_len(ext);
     367     1556417 :         ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
     368             : 
     369             :         /*
     370             :          * We allow neither:
     371             :          *  - zero length
     372             :          *  - overflow/wrap-around
     373             :          */
     374     1556417 :         if (lblock + len <= lblock)
     375             :                 return 0;
     376     1556406 :         return ext4_inode_block_valid(inode, block, len);
     377             : }
     378             : 
     379      169618 : static int ext4_valid_extent_idx(struct inode *inode,
     380             :                                 struct ext4_extent_idx *ext_idx)
     381             : {
     382      169618 :         ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
     383             : 
     384      169618 :         return ext4_inode_block_valid(inode, block, 1);
     385             : }
     386             : 
     387      806164 : static int ext4_valid_extent_entries(struct inode *inode,
     388             :                                      struct ext4_extent_header *eh,
     389             :                                      ext4_lblk_t lblk, ext4_fsblk_t *pblk,
     390             :                                      int depth)
     391             : {
     392      806164 :         unsigned short entries;
     393      806164 :         ext4_lblk_t lblock = 0;
     394      806164 :         ext4_lblk_t cur = 0;
     395             : 
     396      806164 :         if (eh->eh_entries == 0)
     397             :                 return 1;
     398             : 
     399      567503 :         entries = le16_to_cpu(eh->eh_entries);
     400             : 
     401      567503 :         if (depth == 0) {
     402             :                 /* leaf entries */
     403      399289 :                 struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
     404             : 
     405             :                 /*
     406             :                  * The logical block in the first entry should equal to
     407             :                  * the number in the index block.
     408             :                  */
     409      399289 :                 if (depth != ext_depth(inode) &&
     410        8502 :                     lblk != le32_to_cpu(ext->ee_block))
     411             :                         return 0;
     412     1955754 :                 while (entries) {
     413     1556411 :                         if (!ext4_valid_extent(inode, ext))
     414             :                                 return 0;
     415             : 
     416             :                         /* Check for overlapping extents */
     417     1556467 :                         lblock = le32_to_cpu(ext->ee_block);
     418     1556467 :                         if (lblock < cur) {
     419           0 :                                 *pblk = ext4_ext_pblock(ext);
     420           0 :                                 return 0;
     421             :                         }
     422     1556467 :                         cur = lblock + ext4_ext_get_actual_len(ext);
     423     1556467 :                         ext++;
     424     1556467 :                         entries--;
     425             :                 }
     426             :         } else {
     427      168214 :                 struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
     428             : 
     429             :                 /*
     430             :                  * The logical block in the first entry should equal to
     431             :                  * the number in the parent index block.
     432             :                  */
     433      168214 :                 if (depth != ext_depth(inode) &&
     434          20 :                     lblk != le32_to_cpu(ext_idx->ei_block))
     435             :                         return 0;
     436      337832 :                 while (entries) {
     437      169619 :                         if (!ext4_valid_extent_idx(inode, ext_idx))
     438             :                                 return 0;
     439             : 
     440             :                         /* Check for overlapping index extents */
     441      169618 :                         lblock = le32_to_cpu(ext_idx->ei_block);
     442      169618 :                         if (lblock < cur) {
     443           0 :                                 *pblk = ext4_idx_pblock(ext_idx);
     444           0 :                                 return 0;
     445             :                         }
     446      169618 :                         ext_idx++;
     447      169618 :                         entries--;
     448      169618 :                         cur = lblock + 1;
     449             :                 }
     450             :         }
     451             :         return 1;
     452             : }
     453             : 
     454      806063 : static int __ext4_ext_check(const char *function, unsigned int line,
     455             :                             struct inode *inode, struct ext4_extent_header *eh,
     456             :                             int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk)
     457             : {
     458      806063 :         const char *error_msg;
     459      806063 :         int max = 0, err = -EFSCORRUPTED;
     460             : 
     461      806063 :         if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
     462           0 :                 error_msg = "invalid magic";
     463           0 :                 goto corrupted;
     464             :         }
     465      806063 :         if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
     466           0 :                 error_msg = "unexpected eh_depth";
     467           0 :                 goto corrupted;
     468             :         }
     469      806063 :         if (unlikely(eh->eh_max == 0)) {
     470           0 :                 error_msg = "invalid eh_max";
     471           0 :                 goto corrupted;
     472             :         }
     473      806063 :         max = ext4_ext_max_entries(inode, depth);
     474      806063 :         if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
     475           0 :                 error_msg = "too large eh_max";
     476           0 :                 goto corrupted;
     477             :         }
     478      806063 :         if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
     479           0 :                 error_msg = "invalid eh_entries";
     480           0 :                 goto corrupted;
     481             :         }
     482      806063 :         if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
     483           0 :                 error_msg = "eh_entries is 0 but eh_depth is > 0";
     484           0 :                 goto corrupted;
     485             :         }
     486      806063 :         if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
     487           2 :                 error_msg = "invalid extent entries";
     488           2 :                 goto corrupted;
     489             :         }
     490      806199 :         if (unlikely(depth > 32)) {
     491           0 :                 error_msg = "too large eh_depth";
     492           0 :                 goto corrupted;
     493             :         }
     494             :         /* Verify checksum on non-root extent tree nodes */
     495      815310 :         if (ext_depth(inode) != depth &&
     496        9111 :             !ext4_extent_block_csum_verify(inode, eh)) {
     497           0 :                 error_msg = "extent tree corrupted";
     498           0 :                 err = -EFSBADCRC;
     499           0 :                 goto corrupted;
     500             :         }
     501             :         return 0;
     502             : 
     503           2 : corrupted:
     504           2 :         ext4_error_inode_err(inode, function, line, 0, -err,
     505             :                              "pblk %llu bad header/extent: %s - magic %x, "
     506             :                              "entries %u, max %u(%u), depth %u(%u)",
     507             :                              (unsigned long long) pblk, error_msg,
     508             :                              le16_to_cpu(eh->eh_magic),
     509             :                              le16_to_cpu(eh->eh_entries),
     510             :                              le16_to_cpu(eh->eh_max),
     511             :                              max, le16_to_cpu(eh->eh_depth), depth);
     512           2 :         return err;
     513             : }
     514             : 
     515             : #define ext4_ext_check(inode, eh, depth, pblk)                  \
     516             :         __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0)
     517             : 
     518      202109 : int ext4_ext_check_inode(struct inode *inode)
     519             : {
     520      202109 :         return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
     521             : }
     522             : 
     523     3650354 : static void ext4_cache_extents(struct inode *inode,
     524             :                                struct ext4_extent_header *eh)
     525             : {
     526     3650354 :         struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
     527     3650354 :         ext4_lblk_t prev = 0;
     528     3650354 :         int i;
     529             : 
     530     6834045 :         for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
     531     3180888 :                 unsigned int status = EXTENT_STATUS_WRITTEN;
     532     3180888 :                 ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
     533     3180888 :                 int len = ext4_ext_get_actual_len(ex);
     534             : 
     535     3180888 :                 if (prev && (prev != lblk))
     536     1361500 :                         ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
     537             :                                              EXTENT_STATUS_HOLE);
     538             : 
     539     3180887 :                 if (ext4_ext_is_unwritten(ex))
     540     1326881 :                         status = EXTENT_STATUS_UNWRITTEN;
     541     3180887 :                 ext4_es_cache_extent(inode, lblk, len,
     542             :                                      ext4_ext_pblock(ex), status);
     543     3183691 :                 prev = lblk + len;
     544             :         }
     545     3653157 : }
     546             : 
     547             : static struct buffer_head *
     548    24123003 : __read_extent_tree_block(const char *function, unsigned int line,
     549             :                          struct inode *inode, struct ext4_extent_idx *idx,
     550             :                          int depth, int flags)
     551             : {
     552    24123003 :         struct buffer_head              *bh;
     553    24123003 :         int                             err;
     554    24123003 :         gfp_t                           gfp_flags = __GFP_MOVABLE | GFP_NOFS;
     555    24123003 :         ext4_fsblk_t                    pblk;
     556             : 
     557    24123003 :         if (flags & EXT4_EX_NOFAIL)
     558      417878 :                 gfp_flags |= __GFP_NOFAIL;
     559             : 
     560    24123003 :         pblk = ext4_idx_pblock(idx);
     561    24123003 :         bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags);
     562    24127427 :         if (unlikely(!bh))
     563             :                 return ERR_PTR(-ENOMEM);
     564             : 
     565    24127427 :         if (!bh_uptodate_or_lock(bh)) {
     566        2799 :                 trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
     567        2799 :                 err = ext4_read_bh(bh, 0, NULL);
     568        2799 :                 if (err < 0)
     569           0 :                         goto errout;
     570             :         }
     571    48255630 :         if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
     572             :                 return bh;
     573        9098 :         err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh),
     574        9098 :                                depth, pblk, le32_to_cpu(idx->ei_block));
     575        9113 :         if (err)
     576           2 :                 goto errout;
     577        9111 :         set_buffer_verified(bh);
     578             :         /*
     579             :          * If this is a leaf block, cache all of its entries
     580             :          */
     581        9111 :         if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
     582        6372 :                 struct ext4_extent_header *eh = ext_block_hdr(bh);
     583        6372 :                 ext4_cache_extents(inode, eh);
     584             :         }
     585             :         return bh;
     586           2 : errout:
     587           2 :         put_bh(bh);
     588           2 :         return ERR_PTR(err);
     589             : 
     590             : }
     591             : 
     592             : #define read_extent_tree_block(inode, idx, depth, flags)                \
     593             :         __read_extent_tree_block(__func__, __LINE__, (inode), (idx),    \
     594             :                                  (depth), (flags))
     595             : 
     596             : /*
     597             :  * This function is called to cache a file's extent information in the
     598             :  * extent status tree
     599             :  */
     600           0 : int ext4_ext_precache(struct inode *inode)
     601             : {
     602           0 :         struct ext4_inode_info *ei = EXT4_I(inode);
     603           0 :         struct ext4_ext_path *path = NULL;
     604           0 :         struct buffer_head *bh;
     605           0 :         int i = 0, depth, ret = 0;
     606             : 
     607           0 :         if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
     608             :                 return 0;       /* not an extent-mapped inode */
     609             : 
     610           0 :         down_read(&ei->i_data_sem);
     611           0 :         depth = ext_depth(inode);
     612             : 
     613             :         /* Don't cache anything if there are no external extent blocks */
     614           0 :         if (!depth) {
     615           0 :                 up_read(&ei->i_data_sem);
     616           0 :                 return ret;
     617             :         }
     618             : 
     619           0 :         path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
     620             :                        GFP_NOFS);
     621           0 :         if (path == NULL) {
     622           0 :                 up_read(&ei->i_data_sem);
     623           0 :                 return -ENOMEM;
     624             :         }
     625             : 
     626           0 :         path[0].p_hdr = ext_inode_hdr(inode);
     627           0 :         ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0);
     628           0 :         if (ret)
     629           0 :                 goto out;
     630           0 :         path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr);
     631           0 :         while (i >= 0) {
     632             :                 /*
     633             :                  * If this is a leaf block or we've reached the end of
     634             :                  * the index block, go up
     635             :                  */
     636           0 :                 if ((i == depth) ||
     637           0 :                     path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
     638           0 :                         brelse(path[i].p_bh);
     639           0 :                         path[i].p_bh = NULL;
     640           0 :                         i--;
     641           0 :                         continue;
     642             :                 }
     643           0 :                 bh = read_extent_tree_block(inode, path[i].p_idx++,
     644             :                                             depth - i - 1,
     645             :                                             EXT4_EX_FORCE_CACHE);
     646           0 :                 if (IS_ERR(bh)) {
     647           0 :                         ret = PTR_ERR(bh);
     648           0 :                         break;
     649             :                 }
     650           0 :                 i++;
     651           0 :                 path[i].p_bh = bh;
     652           0 :                 path[i].p_hdr = ext_block_hdr(bh);
     653           0 :                 path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr);
     654             :         }
     655           0 :         ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
     656           0 : out:
     657           0 :         up_read(&ei->i_data_sem);
     658           0 :         ext4_free_ext_path(path);
     659           0 :         return ret;
     660             : }
     661             : 
     662             : #ifdef EXT_DEBUG
     663             : static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
     664             : {
     665             :         int k, l = path->p_depth;
     666             : 
     667             :         ext_debug(inode, "path:");
     668             :         for (k = 0; k <= l; k++, path++) {
     669             :                 if (path->p_idx) {
     670             :                         ext_debug(inode, "  %d->%llu",
     671             :                                   le32_to_cpu(path->p_idx->ei_block),
     672             :                                   ext4_idx_pblock(path->p_idx));
     673             :                 } else if (path->p_ext) {
     674             :                         ext_debug(inode, "  %d:[%d]%d:%llu ",
     675             :                                   le32_to_cpu(path->p_ext->ee_block),
     676             :                                   ext4_ext_is_unwritten(path->p_ext),
     677             :                                   ext4_ext_get_actual_len(path->p_ext),
     678             :                                   ext4_ext_pblock(path->p_ext));
     679             :                 } else
     680             :                         ext_debug(inode, "  []");
     681             :         }
     682             :         ext_debug(inode, "\n");
     683             : }
     684             : 
     685             : static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
     686             : {
     687             :         int depth = ext_depth(inode);
     688             :         struct ext4_extent_header *eh;
     689             :         struct ext4_extent *ex;
     690             :         int i;
     691             : 
     692             :         if (!path)
     693             :                 return;
     694             : 
     695             :         eh = path[depth].p_hdr;
     696             :         ex = EXT_FIRST_EXTENT(eh);
     697             : 
     698             :         ext_debug(inode, "Displaying leaf extents\n");
     699             : 
     700             :         for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
     701             :                 ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
     702             :                           ext4_ext_is_unwritten(ex),
     703             :                           ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
     704             :         }
     705             :         ext_debug(inode, "\n");
     706             : }
     707             : 
     708             : static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
     709             :                         ext4_fsblk_t newblock, int level)
     710             : {
     711             :         int depth = ext_depth(inode);
     712             :         struct ext4_extent *ex;
     713             : 
     714             :         if (depth != level) {
     715             :                 struct ext4_extent_idx *idx;
     716             :                 idx = path[level].p_idx;
     717             :                 while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
     718             :                         ext_debug(inode, "%d: move %d:%llu in new index %llu\n",
     719             :                                   level, le32_to_cpu(idx->ei_block),
     720             :                                   ext4_idx_pblock(idx), newblock);
     721             :                         idx++;
     722             :                 }
     723             : 
     724             :                 return;
     725             :         }
     726             : 
     727             :         ex = path[depth].p_ext;
     728             :         while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
     729             :                 ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n",
     730             :                                 le32_to_cpu(ex->ee_block),
     731             :                                 ext4_ext_pblock(ex),
     732             :                                 ext4_ext_is_unwritten(ex),
     733             :                                 ext4_ext_get_actual_len(ex),
     734             :                                 newblock);
     735             :                 ex++;
     736             :         }
     737             : }
     738             : 
     739             : #else
     740             : #define ext4_ext_show_path(inode, path)
     741             : #define ext4_ext_show_leaf(inode, path)
     742             : #define ext4_ext_show_move(inode, path, newblock, level)
     743             : #endif
     744             : 
     745             : /*
     746             :  * ext4_ext_binsearch_idx:
     747             :  * binary search for the closest index of the given block
     748             :  * the header must be checked before calling this
     749             :  */
     750             : static void
     751    23885347 : ext4_ext_binsearch_idx(struct inode *inode,
     752             :                         struct ext4_ext_path *path, ext4_lblk_t block)
     753             : {
     754    23885347 :         struct ext4_extent_header *eh = path->p_hdr;
     755    23885347 :         struct ext4_extent_idx *r, *l, *m;
     756             : 
     757             : 
     758    23885347 :         ext_debug(inode, "binsearch for %u(idx):  ", block);
     759             : 
     760    23885347 :         l = EXT_FIRST_INDEX(eh) + 1;
     761    23885347 :         r = EXT_LAST_INDEX(eh);
     762    59089066 :         while (l <= r) {
     763    35203719 :                 m = l + (r - l) / 2;
     764    35203719 :                 ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
     765             :                           le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
     766             :                           r, le32_to_cpu(r->ei_block));
     767             : 
     768    35203719 :                 if (block < le32_to_cpu(m->ei_block))
     769    19508281 :                         r = m - 1;
     770             :                 else
     771    15695438 :                         l = m + 1;
     772             :         }
     773             : 
     774    23885347 :         path->p_idx = l - 1;
     775    23885347 :         ext_debug(inode, "  -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
     776             :                   ext4_idx_pblock(path->p_idx));
     777             : 
     778             : #ifdef CHECK_BINSEARCH
     779             :         {
     780             :                 struct ext4_extent_idx *chix, *ix;
     781             :                 int k;
     782             : 
     783             :                 chix = ix = EXT_FIRST_INDEX(eh);
     784             :                 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
     785             :                         if (k != 0 && le32_to_cpu(ix->ei_block) <=
     786             :                             le32_to_cpu(ix[-1].ei_block)) {
     787             :                                 printk(KERN_DEBUG "k=%d, ix=0x%p, "
     788             :                                        "first=0x%p\n", k,
     789             :                                        ix, EXT_FIRST_INDEX(eh));
     790             :                                 printk(KERN_DEBUG "%u <= %u\n",
     791             :                                        le32_to_cpu(ix->ei_block),
     792             :                                        le32_to_cpu(ix[-1].ei_block));
     793             :                         }
     794             :                         BUG_ON(k && le32_to_cpu(ix->ei_block)
     795             :                                            <= le32_to_cpu(ix[-1].ei_block));
     796             :                         if (block < le32_to_cpu(ix->ei_block))
     797             :                                 break;
     798             :                         chix = ix;
     799             :                 }
     800             :                 BUG_ON(chix != path->p_idx);
     801             :         }
     802             : #endif
     803             : 
     804    23885347 : }
     805             : 
     806             : /*
     807             :  * ext4_ext_binsearch:
     808             :  * binary search for closest extent of the given block
     809             :  * the header must be checked before calling this
     810             :  */
     811             : static void
     812    21298986 : ext4_ext_binsearch(struct inode *inode,
     813             :                 struct ext4_ext_path *path, ext4_lblk_t block)
     814             : {
     815    21298986 :         struct ext4_extent_header *eh = path->p_hdr;
     816    21298986 :         struct ext4_extent *r, *l, *m;
     817             : 
     818    21298986 :         if (eh->eh_entries == 0) {
     819             :                 /*
     820             :                  * this leaf is empty:
     821             :                  * we get such a leaf in split/add case
     822             :                  */
     823             :                 return;
     824             :         }
     825             : 
     826    18700073 :         ext_debug(inode, "binsearch for %u:  ", block);
     827             : 
     828    18700073 :         l = EXT_FIRST_EXTENT(eh) + 1;
     829    18700073 :         r = EXT_LAST_EXTENT(eh);
     830             : 
     831   121619990 :         while (l <= r) {
     832   102919917 :                 m = l + (r - l) / 2;
     833   102919917 :                 ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
     834             :                           le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
     835             :                           r, le32_to_cpu(r->ee_block));
     836             : 
     837   102919917 :                 if (block < le32_to_cpu(m->ee_block))
     838    48091162 :                         r = m - 1;
     839             :                 else
     840    54828755 :                         l = m + 1;
     841             :         }
     842             : 
     843    18700073 :         path->p_ext = l - 1;
     844    18700073 :         ext_debug(inode, "  -> %d:%llu:[%d]%d ",
     845             :                         le32_to_cpu(path->p_ext->ee_block),
     846             :                         ext4_ext_pblock(path->p_ext),
     847             :                         ext4_ext_is_unwritten(path->p_ext),
     848             :                         ext4_ext_get_actual_len(path->p_ext));
     849             : 
     850             : #ifdef CHECK_BINSEARCH
     851             :         {
     852             :                 struct ext4_extent *chex, *ex;
     853             :                 int k;
     854             : 
     855             :                 chex = ex = EXT_FIRST_EXTENT(eh);
     856             :                 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
     857             :                         BUG_ON(k && le32_to_cpu(ex->ee_block)
     858             :                                           <= le32_to_cpu(ex[-1].ee_block));
     859             :                         if (block < le32_to_cpu(ex->ee_block))
     860             :                                 break;
     861             :                         chex = ex;
     862             :                 }
     863             :                 BUG_ON(chex != path->p_ext);
     864             :         }
     865             : #endif
     866             : 
     867             : }
     868             : 
     869     2605851 : void ext4_ext_tree_init(handle_t *handle, struct inode *inode)
     870             : {
     871     2605851 :         struct ext4_extent_header *eh;
     872             : 
     873     2605851 :         eh = ext_inode_hdr(inode);
     874     2605851 :         eh->eh_depth = 0;
     875     2605851 :         eh->eh_entries = 0;
     876     2605851 :         eh->eh_magic = EXT4_EXT_MAGIC;
     877     2605851 :         eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
     878     2605851 :         eh->eh_generation = 0;
     879     2605851 :         ext4_mark_inode_dirty(handle, inode);
     880     2609995 : }
     881             : 
     882             : struct ext4_ext_path *
     883    21297263 : ext4_find_extent(struct inode *inode, ext4_lblk_t block,
     884             :                  struct ext4_ext_path **orig_path, int flags)
     885             : {
     886    21297263 :         struct ext4_extent_header *eh;
     887    21297263 :         struct buffer_head *bh;
     888    21297263 :         struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
     889    21297263 :         short int depth, i, ppos = 0;
     890    21297263 :         int ret;
     891    21297263 :         gfp_t gfp_flags = GFP_NOFS;
     892             : 
     893    21297263 :         if (flags & EXT4_EX_NOFAIL)
     894      440274 :                 gfp_flags |= __GFP_NOFAIL;
     895             : 
     896    21297263 :         eh = ext_inode_hdr(inode);
     897    21297263 :         depth = ext_depth(inode);
     898    21297263 :         if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
     899           0 :                 EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
     900             :                                  depth);
     901           0 :                 ret = -EFSCORRUPTED;
     902           0 :                 goto err;
     903             :         }
     904             : 
     905    21297263 :         if (path) {
     906     2259616 :                 ext4_ext_drop_refs(path);
     907     2259618 :                 if (depth > path[0].p_maxdepth) {
     908           0 :                         kfree(path);
     909           0 :                         *orig_path = path = NULL;
     910             :                 }
     911             :         }
     912    21297265 :         if (!path) {
     913             :                 /* account possible depth increase */
     914    19038373 :                 path = kcalloc(depth + 2, sizeof(struct ext4_ext_path),
     915             :                                 gfp_flags);
     916    19028489 :                 if (unlikely(!path))
     917             :                         return ERR_PTR(-ENOMEM);
     918    19028489 :                 path[0].p_maxdepth = depth + 1;
     919             :         }
     920    21287381 :         path[0].p_hdr = eh;
     921    21287381 :         path[0].p_bh = NULL;
     922             : 
     923    21287381 :         i = depth;
     924    21287381 :         if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
     925     3643164 :                 ext4_cache_extents(inode, eh);
     926             :         /* walk through the tree */
     927    45181851 :         while (i) {
     928    23881281 :                 ext_debug(inode, "depth %d: num %d, max %d\n",
     929             :                           ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
     930             : 
     931    23881281 :                 ext4_ext_binsearch_idx(inode, path + ppos, block);
     932    23886472 :                 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
     933    23886472 :                 path[ppos].p_depth = i;
     934    23886472 :                 path[ppos].p_ext = NULL;
     935             : 
     936    23886472 :                 bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags);
     937    23891321 :                 if (IS_ERR(bh)) {
     938           1 :                         ret = PTR_ERR(bh);
     939           1 :                         goto err;
     940             :                 }
     941             : 
     942    23891320 :                 eh = ext_block_hdr(bh);
     943    23891320 :                 ppos++;
     944    23891320 :                 path[ppos].p_bh = bh;
     945    23891320 :                 path[ppos].p_hdr = eh;
     946             :         }
     947             : 
     948    21300570 :         path[ppos].p_depth = i;
     949    21300570 :         path[ppos].p_ext = NULL;
     950    21300570 :         path[ppos].p_idx = NULL;
     951             : 
     952             :         /* find extent */
     953    21300570 :         ext4_ext_binsearch(inode, path + ppos, block);
     954             :         /* if not an empty leaf */
     955    21297373 :         if (path[ppos].p_ext)
     956    18700004 :                 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
     957             : 
     958             :         ext4_ext_show_path(inode, path);
     959             : 
     960             :         return path;
     961             : 
     962           1 : err:
     963           1 :         ext4_free_ext_path(path);
     964           1 :         if (orig_path)
     965           0 :                 *orig_path = NULL;
     966           1 :         return ERR_PTR(ret);
     967             : }
     968             : 
     969             : /*
     970             :  * ext4_ext_insert_index:
     971             :  * insert new index [@logical;@ptr] into the block at @curp;
     972             :  * check where to insert: before @curp or after @curp
     973             :  */
     974        6464 : static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
     975             :                                  struct ext4_ext_path *curp,
     976             :                                  int logical, ext4_fsblk_t ptr)
     977             : {
     978        6464 :         struct ext4_extent_idx *ix;
     979        6464 :         int len, err;
     980             : 
     981        6464 :         err = ext4_ext_get_access(handle, inode, curp);
     982        6464 :         if (err)
     983             :                 return err;
     984             : 
     985        6464 :         if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
     986           0 :                 EXT4_ERROR_INODE(inode,
     987             :                                  "logical %d == ei_block %d!",
     988             :                                  logical, le32_to_cpu(curp->p_idx->ei_block));
     989           0 :                 return -EFSCORRUPTED;
     990             :         }
     991             : 
     992        6464 :         if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
     993             :                              >= le16_to_cpu(curp->p_hdr->eh_max))) {
     994           0 :                 EXT4_ERROR_INODE(inode,
     995             :                                  "eh_entries %d >= eh_max %d!",
     996             :                                  le16_to_cpu(curp->p_hdr->eh_entries),
     997             :                                  le16_to_cpu(curp->p_hdr->eh_max));
     998           0 :                 return -EFSCORRUPTED;
     999             :         }
    1000             : 
    1001        6464 :         if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
    1002             :                 /* insert after */
    1003        6464 :                 ext_debug(inode, "insert new index %d after: %llu\n",
    1004             :                           logical, ptr);
    1005        6464 :                 ix = curp->p_idx + 1;
    1006             :         } else {
    1007             :                 /* insert before */
    1008             :                 ext_debug(inode, "insert new index %d before: %llu\n",
    1009             :                           logical, ptr);
    1010             :                 ix = curp->p_idx;
    1011             :         }
    1012             : 
    1013        6464 :         len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
    1014        6464 :         BUG_ON(len < 0);
    1015        6464 :         if (len > 0) {
    1016        4442 :                 ext_debug(inode, "insert new index %d: "
    1017             :                                 "move %d indices from 0x%p to 0x%p\n",
    1018             :                                 logical, len, ix, ix + 1);
    1019        8884 :                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
    1020             :         }
    1021             : 
    1022        6464 :         if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
    1023           0 :                 EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
    1024           0 :                 return -EFSCORRUPTED;
    1025             :         }
    1026             : 
    1027        6464 :         ix->ei_block = cpu_to_le32(logical);
    1028        6464 :         ext4_idx_store_pblock(ix, ptr);
    1029        6464 :         le16_add_cpu(&curp->p_hdr->eh_entries, 1);
    1030             : 
    1031        6464 :         if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
    1032           0 :                 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
    1033           0 :                 return -EFSCORRUPTED;
    1034             :         }
    1035             : 
    1036        6464 :         err = ext4_ext_dirty(handle, inode, curp);
    1037        6464 :         ext4_std_error(inode->i_sb, err);
    1038             : 
    1039             :         return err;
    1040             : }
    1041             : 
    1042             : /*
    1043             :  * ext4_ext_split:
    1044             :  * inserts new subtree into the path, using free index entry
    1045             :  * at depth @at:
    1046             :  * - allocates all needed blocks (new leaf and all intermediate index blocks)
    1047             :  * - makes decision where to split
    1048             :  * - moves remaining extents and index entries (right to the split point)
    1049             :  *   into the newly allocated blocks
    1050             :  * - initializes subtree
    1051             :  */
    1052        6466 : static int ext4_ext_split(handle_t *handle, struct inode *inode,
    1053             :                           unsigned int flags,
    1054             :                           struct ext4_ext_path *path,
    1055             :                           struct ext4_extent *newext, int at)
    1056             : {
    1057        6466 :         struct buffer_head *bh = NULL;
    1058        6466 :         int depth = ext_depth(inode);
    1059        6466 :         struct ext4_extent_header *neh;
    1060        6466 :         struct ext4_extent_idx *fidx;
    1061        6466 :         int i = at, k, m, a;
    1062        6466 :         ext4_fsblk_t newblock, oldblock;
    1063        6466 :         __le32 border;
    1064        6466 :         ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
    1065        6466 :         gfp_t gfp_flags = GFP_NOFS;
    1066        6466 :         int err = 0;
    1067        6466 :         size_t ext_size = 0;
    1068             : 
    1069        6466 :         if (flags & EXT4_EX_NOFAIL)
    1070           0 :                 gfp_flags |= __GFP_NOFAIL;
    1071             : 
    1072             :         /* make decision: where to split? */
    1073             :         /* FIXME: now decision is simplest: at current extent */
    1074             : 
    1075             :         /* if current leaf will be split, then we should use
    1076             :          * border from split point */
    1077        6466 :         if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
    1078           0 :                 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
    1079           0 :                 return -EFSCORRUPTED;
    1080             :         }
    1081        6466 :         if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
    1082        5835 :                 border = path[depth].p_ext[1].ee_block;
    1083        5835 :                 ext_debug(inode, "leaf will be split."
    1084             :                                 " next leaf starts at %d\n",
    1085             :                                   le32_to_cpu(border));
    1086             :         } else {
    1087         631 :                 border = newext->ee_block;
    1088         631 :                 ext_debug(inode, "leaf will be added."
    1089             :                                 " next leaf starts at %d\n",
    1090             :                                 le32_to_cpu(border));
    1091             :         }
    1092             : 
    1093             :         /*
    1094             :          * If error occurs, then we break processing
    1095             :          * and mark filesystem read-only. index won't
    1096             :          * be inserted and tree will be in consistent
    1097             :          * state. Next mount will repair buffers too.
    1098             :          */
    1099             : 
    1100             :         /*
    1101             :          * Get array to track all allocated blocks.
    1102             :          * We need this to handle errors and free blocks
    1103             :          * upon them.
    1104             :          */
    1105        6466 :         ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags);
    1106        6466 :         if (!ablocks)
    1107             :                 return -ENOMEM;
    1108             : 
    1109             :         /* allocate all needed blocks */
    1110             :         ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at);
    1111       12936 :         for (a = 0; a < depth - at; a++) {
    1112        6472 :                 newblock = ext4_ext_new_meta_block(handle, inode, path,
    1113             :                                                    newext, &err, flags);
    1114        6472 :                 if (newblock == 0)
    1115           2 :                         goto cleanup;
    1116        6470 :                 ablocks[a] = newblock;
    1117             :         }
    1118             : 
    1119             :         /* initialize new leaf */
    1120        6464 :         newblock = ablocks[--a];
    1121        6464 :         if (unlikely(newblock == 0)) {
    1122           0 :                 EXT4_ERROR_INODE(inode, "newblock == 0!");
    1123           0 :                 err = -EFSCORRUPTED;
    1124           0 :                 goto cleanup;
    1125             :         }
    1126        6464 :         bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
    1127        6464 :         if (unlikely(!bh)) {
    1128           0 :                 err = -ENOMEM;
    1129           0 :                 goto cleanup;
    1130             :         }
    1131        6464 :         lock_buffer(bh);
    1132             : 
    1133        6464 :         err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
    1134             :                                              EXT4_JTR_NONE);
    1135        6464 :         if (err)
    1136           0 :                 goto cleanup;
    1137             : 
    1138        6464 :         neh = ext_block_hdr(bh);
    1139        6464 :         neh->eh_entries = 0;
    1140        6464 :         neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
    1141        6464 :         neh->eh_magic = EXT4_EXT_MAGIC;
    1142        6464 :         neh->eh_depth = 0;
    1143        6464 :         neh->eh_generation = 0;
    1144             : 
    1145             :         /* move remainder of path[depth] to the new leaf */
    1146        6464 :         if (unlikely(path[depth].p_hdr->eh_entries !=
    1147             :                      path[depth].p_hdr->eh_max)) {
    1148           0 :                 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
    1149             :                                  path[depth].p_hdr->eh_entries,
    1150             :                                  path[depth].p_hdr->eh_max);
    1151           0 :                 err = -EFSCORRUPTED;
    1152           0 :                 goto cleanup;
    1153             :         }
    1154             :         /* start copy from next extent */
    1155        6464 :         m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
    1156        6464 :         ext4_ext_show_move(inode, path, newblock, depth);
    1157        6464 :         if (m) {
    1158        5833 :                 struct ext4_extent *ex;
    1159        5833 :                 ex = EXT_FIRST_EXTENT(neh);
    1160       11666 :                 memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
    1161        5833 :                 le16_add_cpu(&neh->eh_entries, m);
    1162             :         }
    1163             : 
    1164             :         /* zero out unused area in the extent block */
    1165        6464 :         ext_size = sizeof(struct ext4_extent_header) +
    1166        6464 :                 sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries);
    1167        6464 :         memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
    1168        6464 :         ext4_extent_block_csum_set(inode, neh);
    1169        6464 :         set_buffer_uptodate(bh);
    1170        6464 :         unlock_buffer(bh);
    1171             : 
    1172        6464 :         err = ext4_handle_dirty_metadata(handle, inode, bh);
    1173        6464 :         if (err)
    1174           0 :                 goto cleanup;
    1175        6464 :         brelse(bh);
    1176        6464 :         bh = NULL;
    1177             : 
    1178             :         /* correct old leaf */
    1179        6464 :         if (m) {
    1180        5833 :                 err = ext4_ext_get_access(handle, inode, path + depth);
    1181        5833 :                 if (err)
    1182           0 :                         goto cleanup;
    1183        5833 :                 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
    1184        5833 :                 err = ext4_ext_dirty(handle, inode, path + depth);
    1185        5833 :                 if (err)
    1186           0 :                         goto cleanup;
    1187             : 
    1188             :         }
    1189             : 
    1190             :         /* create intermediate indexes */
    1191        6464 :         k = depth - at - 1;
    1192        6464 :         if (unlikely(k < 0)) {
    1193           0 :                 EXT4_ERROR_INODE(inode, "k %d < 0!", k);
    1194           0 :                 err = -EFSCORRUPTED;
    1195           0 :                 goto cleanup;
    1196             :         }
    1197        6464 :         if (k)
    1198             :                 ext_debug(inode, "create %d intermediate indices\n", k);
    1199             :         /* insert new index into current index block */
    1200             :         /* current depth stored in i var */
    1201        6464 :         i = depth - 1;
    1202        6470 :         while (k--) {
    1203           6 :                 oldblock = newblock;
    1204           6 :                 newblock = ablocks[--a];
    1205           6 :                 bh = sb_getblk(inode->i_sb, newblock);
    1206           6 :                 if (unlikely(!bh)) {
    1207           0 :                         err = -ENOMEM;
    1208           0 :                         goto cleanup;
    1209             :                 }
    1210           6 :                 lock_buffer(bh);
    1211             : 
    1212           6 :                 err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
    1213             :                                                      EXT4_JTR_NONE);
    1214           6 :                 if (err)
    1215           0 :                         goto cleanup;
    1216             : 
    1217           6 :                 neh = ext_block_hdr(bh);
    1218           6 :                 neh->eh_entries = cpu_to_le16(1);
    1219           6 :                 neh->eh_magic = EXT4_EXT_MAGIC;
    1220           6 :                 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
    1221           6 :                 neh->eh_depth = cpu_to_le16(depth - i);
    1222           6 :                 neh->eh_generation = 0;
    1223           6 :                 fidx = EXT_FIRST_INDEX(neh);
    1224           6 :                 fidx->ei_block = border;
    1225           6 :                 ext4_idx_store_pblock(fidx, oldblock);
    1226             : 
    1227           6 :                 ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n",
    1228             :                                 i, newblock, le32_to_cpu(border), oldblock);
    1229             : 
    1230             :                 /* move remainder of path[i] to the new index block */
    1231           6 :                 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
    1232             :                                         EXT_LAST_INDEX(path[i].p_hdr))) {
    1233           0 :                         EXT4_ERROR_INODE(inode,
    1234             :                                          "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
    1235             :                                          le32_to_cpu(path[i].p_ext->ee_block));
    1236           0 :                         err = -EFSCORRUPTED;
    1237           0 :                         goto cleanup;
    1238             :                 }
    1239             :                 /* start copy indexes */
    1240           6 :                 m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
    1241           6 :                 ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
    1242             :                                 EXT_MAX_INDEX(path[i].p_hdr));
    1243           6 :                 ext4_ext_show_move(inode, path, newblock, i);
    1244           6 :                 if (m) {
    1245           6 :                         memmove(++fidx, path[i].p_idx,
    1246             :                                 sizeof(struct ext4_extent_idx) * m);
    1247           6 :                         le16_add_cpu(&neh->eh_entries, m);
    1248             :                 }
    1249             :                 /* zero out unused area in the extent block */
    1250           6 :                 ext_size = sizeof(struct ext4_extent_header) +
    1251           6 :                    (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries));
    1252           6 :                 memset(bh->b_data + ext_size, 0,
    1253             :                         inode->i_sb->s_blocksize - ext_size);
    1254           6 :                 ext4_extent_block_csum_set(inode, neh);
    1255           6 :                 set_buffer_uptodate(bh);
    1256           6 :                 unlock_buffer(bh);
    1257             : 
    1258           6 :                 err = ext4_handle_dirty_metadata(handle, inode, bh);
    1259           6 :                 if (err)
    1260           0 :                         goto cleanup;
    1261           6 :                 brelse(bh);
    1262           6 :                 bh = NULL;
    1263             : 
    1264             :                 /* correct old index */
    1265           6 :                 if (m) {
    1266           6 :                         err = ext4_ext_get_access(handle, inode, path + i);
    1267           6 :                         if (err)
    1268           0 :                                 goto cleanup;
    1269           6 :                         le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
    1270           6 :                         err = ext4_ext_dirty(handle, inode, path + i);
    1271           6 :                         if (err)
    1272           0 :                                 goto cleanup;
    1273             :                 }
    1274             : 
    1275           6 :                 i--;
    1276             :         }
    1277             : 
    1278             :         /* insert new index */
    1279        6464 :         err = ext4_ext_insert_index(handle, inode, path + at,
    1280             :                                     le32_to_cpu(border), newblock);
    1281             : 
    1282           0 : cleanup:
    1283        6466 :         if (bh) {
    1284           0 :                 if (buffer_locked(bh))
    1285           0 :                         unlock_buffer(bh);
    1286           0 :                 brelse(bh);
    1287             :         }
    1288             : 
    1289        6466 :         if (err) {
    1290             :                 /* free all allocated blocks in error case */
    1291           6 :                 for (i = 0; i < depth; i++) {
    1292           4 :                         if (!ablocks[i])
    1293           4 :                                 continue;
    1294           0 :                         ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
    1295             :                                          EXT4_FREE_BLOCKS_METADATA);
    1296             :                 }
    1297             :         }
    1298        6466 :         kfree(ablocks);
    1299             : 
    1300        6466 :         return err;
    1301             : }
    1302             : 
    1303             : /*
    1304             :  * ext4_ext_grow_indepth:
    1305             :  * implements tree growing procedure:
    1306             :  * - allocates new block
    1307             :  * - moves top-level data (index block or leaf) into the new block
    1308             :  * - initializes new top-level, creating index that points to the
    1309             :  *   just created block
    1310             :  */
    1311       62164 : static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
    1312             :                                  unsigned int flags)
    1313             : {
    1314       62164 :         struct ext4_extent_header *neh;
    1315       62164 :         struct buffer_head *bh;
    1316       62164 :         ext4_fsblk_t newblock, goal = 0;
    1317       62164 :         struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
    1318       62164 :         int err = 0;
    1319       62164 :         size_t ext_size = 0;
    1320             : 
    1321             :         /* Try to prepend new index to old one */
    1322       62164 :         if (ext_depth(inode))
    1323          54 :                 goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
    1324       62164 :         if (goal > le32_to_cpu(es->s_first_data_block)) {
    1325          54 :                 flags |= EXT4_MB_HINT_TRY_GOAL;
    1326          54 :                 goal--;
    1327             :         } else
    1328       62110 :                 goal = ext4_inode_to_goal_block(inode);
    1329       62164 :         newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
    1330             :                                         NULL, &err);
    1331       62166 :         if (newblock == 0)
    1332         293 :                 return err;
    1333             : 
    1334       61873 :         bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
    1335       61873 :         if (unlikely(!bh))
    1336             :                 return -ENOMEM;
    1337       61873 :         lock_buffer(bh);
    1338             : 
    1339       61872 :         err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
    1340             :                                              EXT4_JTR_NONE);
    1341       61870 :         if (err) {
    1342           0 :                 unlock_buffer(bh);
    1343           0 :                 goto out;
    1344             :         }
    1345             : 
    1346       61870 :         ext_size = sizeof(EXT4_I(inode)->i_data);
    1347             :         /* move top-level index/leaf into new block */
    1348      123740 :         memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size);
    1349             :         /* zero out unused area in the extent block */
    1350       61870 :         memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
    1351             : 
    1352             :         /* set size of new block */
    1353       61870 :         neh = ext_block_hdr(bh);
    1354             :         /* old root could have indexes or leaves
    1355             :          * so calculate e_max right way */
    1356       61870 :         if (ext_depth(inode))
    1357          54 :                 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
    1358             :         else
    1359       61816 :                 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
    1360       61870 :         neh->eh_magic = EXT4_EXT_MAGIC;
    1361       61870 :         ext4_extent_block_csum_set(inode, neh);
    1362       61869 :         set_buffer_uptodate(bh);
    1363       61873 :         set_buffer_verified(bh);
    1364       61873 :         unlock_buffer(bh);
    1365             : 
    1366       61872 :         err = ext4_handle_dirty_metadata(handle, inode, bh);
    1367       61873 :         if (err)
    1368           0 :                 goto out;
    1369             : 
    1370             :         /* Update top-level index: num,max,pointer */
    1371       61873 :         neh = ext_inode_hdr(inode);
    1372       61873 :         neh->eh_entries = cpu_to_le16(1);
    1373       61873 :         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
    1374       61873 :         if (neh->eh_depth == 0) {
    1375             :                 /* Root extent block becomes index block */
    1376       61819 :                 neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
    1377       61819 :                 EXT_FIRST_INDEX(neh)->ei_block =
    1378             :                         EXT_FIRST_EXTENT(neh)->ee_block;
    1379             :         }
    1380       61873 :         ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n",
    1381             :                   le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
    1382             :                   le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
    1383             :                   ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
    1384             : 
    1385       61873 :         le16_add_cpu(&neh->eh_depth, 1);
    1386       61873 :         err = ext4_mark_inode_dirty(handle, inode);
    1387       61872 : out:
    1388       61872 :         brelse(bh);
    1389             : 
    1390       61871 :         return err;
    1391             : }
    1392             : 
    1393             : /*
    1394             :  * ext4_ext_create_new_leaf:
    1395             :  * finds empty index and adds new leaf.
    1396             :  * if no free index is found, then it requests in-depth growing.
    1397             :  */
    1398       68576 : static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
    1399             :                                     unsigned int mb_flags,
    1400             :                                     unsigned int gb_flags,
    1401             :                                     struct ext4_ext_path **ppath,
    1402             :                                     struct ext4_extent *newext)
    1403             : {
    1404       68576 :         struct ext4_ext_path *path = *ppath;
    1405       68576 :         struct ext4_ext_path *curp;
    1406       68576 :         int depth, i, err = 0;
    1407             : 
    1408       68630 : repeat:
    1409       68630 :         i = depth = ext_depth(inode);
    1410             : 
    1411             :         /* walk up to the tree and look for free index entry */
    1412       68630 :         curp = path + depth;
    1413       75157 :         while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
    1414        6527 :                 i--;
    1415        6527 :                 curp--;
    1416             :         }
    1417             : 
    1418             :         /* we use already allocated block for index block,
    1419             :          * so subsequent data blocks should be contiguous */
    1420       68630 :         if (EXT_HAS_FREE_INDEX(curp)) {
    1421             :                 /* if we found index with free entry, then use that
    1422             :                  * entry: create all needed subtree and add new leaf */
    1423        6466 :                 err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
    1424        6466 :                 if (err)
    1425           2 :                         goto out;
    1426             : 
    1427             :                 /* refill path */
    1428        6464 :                 path = ext4_find_extent(inode,
    1429        6464 :                                     (ext4_lblk_t)le32_to_cpu(newext->ee_block),
    1430             :                                     ppath, gb_flags);
    1431        6464 :                 if (IS_ERR(path))
    1432           0 :                         err = PTR_ERR(path);
    1433             :         } else {
    1434             :                 /* tree is full, time to grow in depth */
    1435       62164 :                 err = ext4_ext_grow_indepth(handle, inode, mb_flags);
    1436       62164 :                 if (err)
    1437         293 :                         goto out;
    1438             : 
    1439             :                 /* refill path */
    1440       61871 :                 path = ext4_find_extent(inode,
    1441       61871 :                                    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
    1442             :                                     ppath, gb_flags);
    1443       61869 :                 if (IS_ERR(path)) {
    1444           0 :                         err = PTR_ERR(path);
    1445           0 :                         goto out;
    1446             :                 }
    1447             : 
    1448             :                 /*
    1449             :                  * only first (depth 0 -> 1) produces free space;
    1450             :                  * in all other cases we have to split the grown tree
    1451             :                  */
    1452       61869 :                 depth = ext_depth(inode);
    1453       61869 :                 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
    1454             :                         /* now we need to split */
    1455          54 :                         goto repeat;
    1456             :                 }
    1457             :         }
    1458             : 
    1459       61815 : out:
    1460       68574 :         return err;
    1461             : }
    1462             : 
    1463             : /*
    1464             :  * search the closest allocated block to the left for *logical
    1465             :  * and returns it at @logical + it's physical address at @phys
    1466             :  * if *logical is the smallest allocated block, the function
    1467             :  * returns 0 at @phys
    1468             :  * return value contains 0 (success) or error code
    1469             :  */
    1470     3814858 : static int ext4_ext_search_left(struct inode *inode,
    1471             :                                 struct ext4_ext_path *path,
    1472             :                                 ext4_lblk_t *logical, ext4_fsblk_t *phys)
    1473             : {
    1474     3814858 :         struct ext4_extent_idx *ix;
    1475     3814858 :         struct ext4_extent *ex;
    1476     3814858 :         int depth, ee_len;
    1477             : 
    1478     3814858 :         if (unlikely(path == NULL)) {
    1479           0 :                 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
    1480           0 :                 return -EFSCORRUPTED;
    1481             :         }
    1482     3814858 :         depth = path->p_depth;
    1483     3814858 :         *phys = 0;
    1484             : 
    1485     3814858 :         if (depth == 0 && path->p_ext == NULL)
    1486             :                 return 0;
    1487             : 
    1488             :         /* usually extent in the path covers blocks smaller
    1489             :          * then *logical, but it can be that extent is the
    1490             :          * first one in the file */
    1491             : 
    1492     3033829 :         ex = path[depth].p_ext;
    1493     3033829 :         ee_len = ext4_ext_get_actual_len(ex);
    1494     3033829 :         if (*logical < le32_to_cpu(ex->ee_block)) {
    1495       55613 :                 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
    1496           0 :                         EXT4_ERROR_INODE(inode,
    1497             :                                          "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
    1498             :                                          *logical, le32_to_cpu(ex->ee_block));
    1499           0 :                         return -EFSCORRUPTED;
    1500             :                 }
    1501       77731 :                 while (--depth >= 0) {
    1502       22118 :                         ix = path[depth].p_idx;
    1503       22118 :                         if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
    1504           0 :                                 EXT4_ERROR_INODE(inode,
    1505             :                                   "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
    1506             :                                   ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
    1507             :                                   le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block),
    1508             :                                   depth);
    1509           0 :                                 return -EFSCORRUPTED;
    1510             :                         }
    1511             :                 }
    1512             :                 return 0;
    1513             :         }
    1514             : 
    1515     2978216 :         if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
    1516           0 :                 EXT4_ERROR_INODE(inode,
    1517             :                                  "logical %d < ee_block %d + ee_len %d!",
    1518             :                                  *logical, le32_to_cpu(ex->ee_block), ee_len);
    1519           0 :                 return -EFSCORRUPTED;
    1520             :         }
    1521             : 
    1522     2978216 :         *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
    1523     2978216 :         *phys = ext4_ext_pblock(ex) + ee_len - 1;
    1524     2978216 :         return 0;
    1525             : }
    1526             : 
    1527             : /*
    1528             :  * Search the closest allocated block to the right for *logical
    1529             :  * and returns it at @logical + it's physical address at @phys.
    1530             :  * If not exists, return 0 and @phys is set to 0. We will return
    1531             :  * 1 which means we found an allocated block and ret_ex is valid.
    1532             :  * Or return a (< 0) error code.
    1533             :  */
    1534     3814970 : static int ext4_ext_search_right(struct inode *inode,
    1535             :                                  struct ext4_ext_path *path,
    1536             :                                  ext4_lblk_t *logical, ext4_fsblk_t *phys,
    1537             :                                  struct ext4_extent *ret_ex)
    1538             : {
    1539     3814970 :         struct buffer_head *bh = NULL;
    1540     3814970 :         struct ext4_extent_header *eh;
    1541     3814970 :         struct ext4_extent_idx *ix;
    1542     3814970 :         struct ext4_extent *ex;
    1543     3814970 :         int depth;      /* Note, NOT eh_depth; depth from top of tree */
    1544     3814970 :         int ee_len;
    1545             : 
    1546     3814970 :         if (unlikely(path == NULL)) {
    1547           0 :                 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
    1548           0 :                 return -EFSCORRUPTED;
    1549             :         }
    1550     3814970 :         depth = path->p_depth;
    1551     3814970 :         *phys = 0;
    1552             : 
    1553     3814970 :         if (depth == 0 && path->p_ext == NULL)
    1554             :                 return 0;
    1555             : 
    1556             :         /* usually extent in the path covers blocks smaller
    1557             :          * then *logical, but it can be that extent is the
    1558             :          * first one in the file */
    1559             : 
    1560     3033912 :         ex = path[depth].p_ext;
    1561     3033912 :         ee_len = ext4_ext_get_actual_len(ex);
    1562     3033912 :         if (*logical < le32_to_cpu(ex->ee_block)) {
    1563       55613 :                 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
    1564           0 :                         EXT4_ERROR_INODE(inode,
    1565             :                                          "first_extent(path[%d].p_hdr) != ex",
    1566             :                                          depth);
    1567           0 :                         return -EFSCORRUPTED;
    1568             :                 }
    1569       77731 :                 while (--depth >= 0) {
    1570       22118 :                         ix = path[depth].p_idx;
    1571       22118 :                         if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
    1572           0 :                                 EXT4_ERROR_INODE(inode,
    1573             :                                                  "ix != EXT_FIRST_INDEX *logical %d!",
    1574             :                                                  *logical);
    1575           0 :                                 return -EFSCORRUPTED;
    1576             :                         }
    1577             :                 }
    1578       55613 :                 goto found_extent;
    1579             :         }
    1580             : 
    1581     2978299 :         if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
    1582           0 :                 EXT4_ERROR_INODE(inode,
    1583             :                                  "logical %d < ee_block %d + ee_len %d!",
    1584             :                                  *logical, le32_to_cpu(ex->ee_block), ee_len);
    1585           0 :                 return -EFSCORRUPTED;
    1586             :         }
    1587             : 
    1588     2978299 :         if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
    1589             :                 /* next allocated block in this leaf */
    1590     1995090 :                 ex++;
    1591     1995090 :                 goto found_extent;
    1592             :         }
    1593             : 
    1594             :         /* go up and search for index to the right */
    1595     1944984 :         while (--depth >= 0) {
    1596      985707 :                 ix = path[depth].p_idx;
    1597      985707 :                 if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
    1598       23932 :                         goto got_index;
    1599             :         }
    1600             : 
    1601             :         /* we've gone up to the root and found no index to the right */
    1602             :         return 0;
    1603             : 
    1604             : got_index:
    1605             :         /* we've found index to the right, let's
    1606             :          * follow it and find the closest allocated
    1607             :          * block to the right */
    1608       23932 :         ix++;
    1609       23951 :         while (++depth < path->p_depth) {
    1610             :                 /* subtract from p_depth to get proper eh_depth */
    1611          19 :                 bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
    1612          19 :                 if (IS_ERR(bh))
    1613           0 :                         return PTR_ERR(bh);
    1614          19 :                 eh = ext_block_hdr(bh);
    1615          19 :                 ix = EXT_FIRST_INDEX(eh);
    1616          19 :                 put_bh(bh);
    1617             :         }
    1618             : 
    1619       23932 :         bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
    1620       23932 :         if (IS_ERR(bh))
    1621           0 :                 return PTR_ERR(bh);
    1622       23932 :         eh = ext_block_hdr(bh);
    1623       23932 :         ex = EXT_FIRST_EXTENT(eh);
    1624     2074635 : found_extent:
    1625     2074635 :         *logical = le32_to_cpu(ex->ee_block);
    1626     2074635 :         *phys = ext4_ext_pblock(ex);
    1627     2074635 :         if (ret_ex)
    1628     2074635 :                 *ret_ex = *ex;
    1629     2074635 :         if (bh)
    1630       23962 :                 put_bh(bh);
    1631             :         return 1;
    1632             : }
    1633             : 
    1634             : /*
    1635             :  * ext4_ext_next_allocated_block:
    1636             :  * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
    1637             :  * NOTE: it considers block number from index entry as
    1638             :  * allocated block. Thus, index entries have to be consistent
    1639             :  * with leaves.
    1640             :  */
    1641             : ext4_lblk_t
    1642     4535803 : ext4_ext_next_allocated_block(struct ext4_ext_path *path)
    1643             : {
    1644     4535803 :         int depth;
    1645             : 
    1646     4535803 :         BUG_ON(path == NULL);
    1647     4535803 :         depth = path->p_depth;
    1648             : 
    1649     4535803 :         if (depth == 0 && path->p_ext == NULL)
    1650             :                 return EXT_MAX_BLOCKS;
    1651             : 
    1652     7522456 :         while (depth >= 0) {
    1653     5987265 :                 struct ext4_ext_path *p = &path[depth];
    1654             : 
    1655     5987265 :                 if (depth == path->p_depth) {
    1656             :                         /* leaf */
    1657     4535909 :                         if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr))
    1658     2976546 :                                 return le32_to_cpu(p->p_ext[1].ee_block);
    1659             :                 } else {
    1660             :                         /* index */
    1661     1451356 :                         if (p->p_idx != EXT_LAST_INDEX(p->p_hdr))
    1662       24066 :                                 return le32_to_cpu(p->p_idx[1].ei_block);
    1663             :                 }
    1664     2986653 :                 depth--;
    1665             :         }
    1666             : 
    1667             :         return EXT_MAX_BLOCKS;
    1668             : }
    1669             : 
    1670             : /*
    1671             :  * ext4_ext_next_leaf_block:
    1672             :  * returns first allocated block from next leaf or EXT_MAX_BLOCKS
    1673             :  */
    1674       27194 : static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
    1675             : {
    1676       27194 :         int depth;
    1677             : 
    1678       27194 :         BUG_ON(path == NULL);
    1679       27194 :         depth = path->p_depth;
    1680             : 
    1681             :         /* zero-tree has no leaf blocks at all */
    1682       27194 :         if (depth == 0)
    1683             :                 return EXT_MAX_BLOCKS;
    1684             : 
    1685             :         /* go to index block */
    1686        1813 :         depth--;
    1687             : 
    1688        3037 :         while (depth >= 0) {
    1689        2409 :                 if (path[depth].p_idx !=
    1690        2409 :                                 EXT_LAST_INDEX(path[depth].p_hdr))
    1691        1185 :                         return (ext4_lblk_t)
    1692             :                                 le32_to_cpu(path[depth].p_idx[1].ei_block);
    1693        1224 :                 depth--;
    1694             :         }
    1695             : 
    1696             :         return EXT_MAX_BLOCKS;
    1697             : }
    1698             : 
    1699             : /*
    1700             :  * ext4_ext_correct_indexes:
    1701             :  * if leaf gets modified and modified extent is first in the leaf,
    1702             :  * then we have to correct all indexes above.
    1703             :  * TODO: do we need to correct tree in all cases?
    1704             :  */
    1705     6102224 : static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
    1706             :                                 struct ext4_ext_path *path)
    1707             : {
    1708     6102224 :         struct ext4_extent_header *eh;
    1709     6102224 :         int depth = ext_depth(inode);
    1710     6102224 :         struct ext4_extent *ex;
    1711     6102224 :         __le32 border;
    1712     6102224 :         int k, err = 0;
    1713             : 
    1714     6102224 :         eh = path[depth].p_hdr;
    1715     6102224 :         ex = path[depth].p_ext;
    1716             : 
    1717     6102224 :         if (unlikely(ex == NULL || eh == NULL)) {
    1718           0 :                 EXT4_ERROR_INODE(inode,
    1719             :                                  "ex %p == NULL or eh %p == NULL", ex, eh);
    1720           0 :                 return -EFSCORRUPTED;
    1721             :         }
    1722             : 
    1723     6102224 :         if (depth == 0) {
    1724             :                 /* there is no tree at all */
    1725             :                 return 0;
    1726             :         }
    1727             : 
    1728     5101482 :         if (ex != EXT_FIRST_EXTENT(eh)) {
    1729             :                 /* we correct tree if first leaf got modified only */
    1730             :                 return 0;
    1731             :         }
    1732             : 
    1733             :         /*
    1734             :          * TODO: we need correction if border is smaller than current one
    1735             :          */
    1736       39211 :         k = depth - 1;
    1737       39211 :         border = path[depth].p_ext->ee_block;
    1738       39211 :         err = ext4_ext_get_access(handle, inode, path + k);
    1739       39211 :         if (err)
    1740             :                 return err;
    1741       39211 :         path[k].p_idx->ei_block = border;
    1742       39211 :         err = ext4_ext_dirty(handle, inode, path + k);
    1743       39211 :         if (err)
    1744             :                 return err;
    1745             : 
    1746       39243 :         while (k--) {
    1747             :                 /* change all left-side indexes */
    1748        2006 :                 if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
    1749             :                         break;
    1750          32 :                 err = ext4_ext_get_access(handle, inode, path + k);
    1751          32 :                 if (err)
    1752             :                         break;
    1753          32 :                 path[k].p_idx->ei_block = border;
    1754          32 :                 err = ext4_ext_dirty(handle, inode, path + k);
    1755          32 :                 if (err)
    1756             :                         break;
    1757             :         }
    1758             : 
    1759             :         return err;
    1760             : }
    1761             : 
    1762    22011106 : static int ext4_can_extents_be_merged(struct inode *inode,
    1763             :                                       struct ext4_extent *ex1,
    1764             :                                       struct ext4_extent *ex2)
    1765             : {
    1766    22011106 :         unsigned short ext1_ee_len, ext2_ee_len;
    1767             : 
    1768    22011106 :         if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
    1769             :                 return 0;
    1770             : 
    1771    18992302 :         ext1_ee_len = ext4_ext_get_actual_len(ex1);
    1772    18992302 :         ext2_ee_len = ext4_ext_get_actual_len(ex2);
    1773             : 
    1774    18992302 :         if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
    1775    18992302 :                         le32_to_cpu(ex2->ee_block))
    1776             :                 return 0;
    1777             : 
    1778    16664063 :         if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
    1779             :                 return 0;
    1780             : 
    1781    16587993 :         if (ext4_ext_is_unwritten(ex1) &&
    1782             :             ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
    1783             :                 return 0;
    1784             : #ifdef AGGRESSIVE_TEST
    1785             :         if (ext1_ee_len >= 4)
    1786             :                 return 0;
    1787             : #endif
    1788             : 
    1789    16580151 :         if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
    1790     2972999 :                 return 1;
    1791             :         return 0;
    1792             : }
    1793             : 
    1794             : /*
    1795             :  * This function tries to merge the "ex" extent to the next extent in the tree.
    1796             :  * It always tries to merge towards right. If you want to merge towards
    1797             :  * left, pass "ex - 1" as argument instead of "ex".
    1798             :  * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
    1799             :  * 1 if they got merged.
    1800             :  */
    1801    18669368 : static int ext4_ext_try_to_merge_right(struct inode *inode,
    1802             :                                  struct ext4_ext_path *path,
    1803             :                                  struct ext4_extent *ex)
    1804             : {
    1805    18669368 :         struct ext4_extent_header *eh;
    1806    18669368 :         unsigned int depth, len;
    1807    18669368 :         int merge_done = 0, unwritten;
    1808             : 
    1809    18669368 :         depth = ext_depth(inode);
    1810    18669368 :         BUG_ON(path[depth].p_hdr == NULL);
    1811             :         eh = path[depth].p_hdr;
    1812             : 
    1813    21344950 :         while (ex < EXT_LAST_EXTENT(eh)) {
    1814    19584963 :                 if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
    1815             :                         break;
    1816             :                 /* merge with next extent! */
    1817     2675340 :                 unwritten = ext4_ext_is_unwritten(ex);
    1818     5350680 :                 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
    1819             :                                 + ext4_ext_get_actual_len(ex + 1));
    1820     2675340 :                 if (unwritten)
    1821     2310509 :                         ext4_ext_mark_unwritten(ex);
    1822             : 
    1823     2675340 :                 if (ex + 1 < EXT_LAST_EXTENT(eh)) {
    1824     2641434 :                         len = (EXT_LAST_EXTENT(eh) - ex - 1)
    1825     2641434 :                                 * sizeof(struct ext4_extent);
    1826     5282868 :                         memmove(ex + 1, ex + 2, len);
    1827             :                 }
    1828     2675340 :                 le16_add_cpu(&eh->eh_entries, -1);
    1829     2675340 :                 merge_done = 1;
    1830     2675340 :                 WARN_ON(eh->eh_entries == 0);
    1831     2675582 :                 if (!eh->eh_entries)
    1832           0 :                         EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
    1833             :         }
    1834             : 
    1835    18669610 :         return merge_done;
    1836             : }
    1837             : 
    1838             : /*
    1839             :  * This function does a very simple check to see if we can collapse
    1840             :  * an extent tree with a single extent tree leaf block into the inode.
    1841             :  */
    1842     5959458 : static void ext4_ext_try_to_merge_up(handle_t *handle,
    1843             :                                      struct inode *inode,
    1844             :                                      struct ext4_ext_path *path)
    1845             : {
    1846     5959458 :         size_t s;
    1847     5959458 :         unsigned max_root = ext4_ext_space_root(inode, 0);
    1848     5959458 :         ext4_fsblk_t blk;
    1849             : 
    1850     5959458 :         if ((path[0].p_depth != 1) ||
    1851     2912795 :             (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
    1852     2746185 :             (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
    1853             :                 return;
    1854             : 
    1855             :         /*
    1856             :          * We need to modify the block allocation bitmap and the block
    1857             :          * group descriptor to release the extent tree block.  If we
    1858             :          * can't get the journal credits, give up.
    1859             :          */
    1860       80040 :         if (ext4_journal_extend(handle, 2,
    1861             :                         ext4_free_metadata_revoke_credits(inode->i_sb, 1)))
    1862             :                 return;
    1863             : 
    1864             :         /*
    1865             :          * Copy the extent data up to the inode
    1866             :          */
    1867       39960 :         blk = ext4_idx_pblock(path[0].p_idx);
    1868       39960 :         s = le16_to_cpu(path[1].p_hdr->eh_entries) *
    1869             :                 sizeof(struct ext4_extent_idx);
    1870       39960 :         s += sizeof(struct ext4_extent_header);
    1871             : 
    1872       39960 :         path[1].p_maxdepth = path[0].p_maxdepth;
    1873       79920 :         memcpy(path[0].p_hdr, path[1].p_hdr, s);
    1874       39960 :         path[0].p_depth = 0;
    1875       39960 :         path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
    1876       39960 :                 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
    1877       39960 :         path[0].p_hdr->eh_max = cpu_to_le16(max_root);
    1878             : 
    1879       39960 :         brelse(path[1].p_bh);
    1880       39960 :         ext4_free_blocks(handle, inode, NULL, blk, 1,
    1881             :                          EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
    1882             : }
    1883             : 
    1884             : /*
    1885             :  * This function tries to merge the @ex extent to neighbours in the tree, then
    1886             :  * tries to collapse the extent tree into the inode.
    1887             :  */
    1888     5959781 : static void ext4_ext_try_to_merge(handle_t *handle,
    1889             :                                   struct inode *inode,
    1890             :                                   struct ext4_ext_path *path,
    1891             :                                   struct ext4_extent *ex)
    1892             : {
    1893     5959781 :         struct ext4_extent_header *eh;
    1894     5959781 :         unsigned int depth;
    1895     5959781 :         int merge_done = 0;
    1896             : 
    1897     5959781 :         depth = ext_depth(inode);
    1898     5959781 :         BUG_ON(path[depth].p_hdr == NULL);
    1899     5959781 :         eh = path[depth].p_hdr;
    1900             : 
    1901     5959781 :         if (ex > EXT_FIRST_EXTENT(eh))
    1902     5144482 :                 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
    1903             : 
    1904     5144480 :         if (!merge_done)
    1905     3400052 :                 (void) ext4_ext_try_to_merge_right(inode, path, ex);
    1906             : 
    1907     5959512 :         ext4_ext_try_to_merge_up(handle, inode, path);
    1908     5959517 : }
    1909             : 
    1910             : /*
    1911             :  * check if a portion of the "newext" extent overlaps with an
    1912             :  * existing extent.
    1913             :  *
    1914             :  * If there is an overlap discovered, it updates the length of the newext
    1915             :  * such that there will be no overlap, and then returns 1.
    1916             :  * If there is no overlap found, it returns 0.
    1917             :  */
    1918     3814226 : static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
    1919             :                                            struct inode *inode,
    1920             :                                            struct ext4_extent *newext,
    1921             :                                            struct ext4_ext_path *path)
    1922             : {
    1923     3814226 :         ext4_lblk_t b1, b2;
    1924     3814226 :         unsigned int depth, len1;
    1925     3814226 :         unsigned int ret = 0;
    1926             : 
    1927     3814226 :         b1 = le32_to_cpu(newext->ee_block);
    1928     3814226 :         len1 = ext4_ext_get_actual_len(newext);
    1929     3814226 :         depth = ext_depth(inode);
    1930     3814226 :         if (!path[depth].p_ext)
    1931      780932 :                 goto out;
    1932     3033294 :         b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
    1933             : 
    1934             :         /*
    1935             :          * get the next allocated block if the extent in the path
    1936             :          * is before the requested block(s)
    1937             :          */
    1938     3033294 :         if (b2 < b1) {
    1939     2977584 :                 b2 = ext4_ext_next_allocated_block(path);
    1940     2977323 :                 if (b2 == EXT_MAX_BLOCKS)
    1941      958576 :                         goto out;
    1942     2018747 :                 b2 = EXT4_LBLK_CMASK(sbi, b2);
    1943             :         }
    1944             : 
    1945             :         /* check for wrap through zero on extent logical start block*/
    1946     2074457 :         if (b1 + len1 < b1) {
    1947           0 :                 len1 = EXT_MAX_BLOCKS - b1;
    1948           0 :                 newext->ee_len = cpu_to_le16(len1);
    1949           0 :                 ret = 1;
    1950             :         }
    1951             : 
    1952             :         /* check for overlap */
    1953     2074457 :         if (b1 + len1 > b2) {
    1954           0 :                 newext->ee_len = cpu_to_le16(b2 - b1);
    1955           0 :                 ret = 1;
    1956             :         }
    1957     2074457 : out:
    1958     3813965 :         return ret;
    1959             : }
    1960             : 
    1961             : /*
    1962             :  * ext4_ext_insert_extent:
    1963             :  * tries to merge requested extent into the existing extent or
    1964             :  * inserts requested extent as new one into the tree,
    1965             :  * creating new leaf in the no-space case.
    1966             :  */
    1967     6080499 : int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
    1968             :                                 struct ext4_ext_path **ppath,
    1969             :                                 struct ext4_extent *newext, int gb_flags)
    1970             : {
    1971     6080499 :         struct ext4_ext_path *path = *ppath;
    1972     6080499 :         struct ext4_extent_header *eh;
    1973     6080499 :         struct ext4_extent *ex, *fex;
    1974     6080499 :         struct ext4_extent *nearex; /* nearest extent */
    1975     6080499 :         struct ext4_ext_path *npath = NULL;
    1976     6080499 :         int depth, len, err;
    1977     6080499 :         ext4_lblk_t next;
    1978     6080499 :         int mb_flags = 0, unwritten;
    1979             : 
    1980     6080499 :         if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
    1981      901210 :                 mb_flags |= EXT4_MB_DELALLOC_RESERVED;
    1982    12160998 :         if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
    1983           0 :                 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
    1984           0 :                 return -EFSCORRUPTED;
    1985             :         }
    1986     6080499 :         depth = ext_depth(inode);
    1987     6080499 :         ex = path[depth].p_ext;
    1988     6080499 :         eh = path[depth].p_hdr;
    1989     6080499 :         if (unlikely(path[depth].p_hdr == NULL)) {
    1990           0 :                 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
    1991           0 :                 return -EFSCORRUPTED;
    1992             :         }
    1993             : 
    1994             :         /* try to insert block into found extent and return */
    1995     6080499 :         if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
    1996             : 
    1997             :                 /*
    1998             :                  * Try to see whether we should rather test the extent on
    1999             :                  * right from ex, or from the left of ex. This is because
    2000             :                  * ext4_find_extent() can return either extent on the
    2001             :                  * left, or on the right from the searched position. This
    2002             :                  * will make merging more effective.
    2003             :                  */
    2004     1428579 :                 if (ex < EXT_LAST_EXTENT(eh) &&
    2005      705706 :                     (le32_to_cpu(ex->ee_block) +
    2006      705706 :                     ext4_ext_get_actual_len(ex) <
    2007      705706 :                     le32_to_cpu(newext->ee_block))) {
    2008      134419 :                         ex += 1;
    2009      134419 :                         goto prepend;
    2010     1294160 :                 } else if ((ex > EXT_FIRST_EXTENT(eh)) &&
    2011     1220714 :                            (le32_to_cpu(newext->ee_block) +
    2012     1220714 :                            ext4_ext_get_actual_len(newext) <
    2013     1220714 :                            le32_to_cpu(ex->ee_block)))
    2014           0 :                         ex -= 1;
    2015             : 
    2016             :                 /* Try to append newex to the ex */
    2017     1294160 :                 if (ext4_can_extents_be_merged(inode, ex, newext)) {
    2018      296592 :                         ext_debug(inode, "append [%d]%d block to %u:[%d]%d"
    2019             :                                   "(from %llu)\n",
    2020             :                                   ext4_ext_is_unwritten(newext),
    2021             :                                   ext4_ext_get_actual_len(newext),
    2022             :                                   le32_to_cpu(ex->ee_block),
    2023             :                                   ext4_ext_is_unwritten(ex),
    2024             :                                   ext4_ext_get_actual_len(ex),
    2025             :                                   ext4_ext_pblock(ex));
    2026      296592 :                         err = ext4_ext_get_access(handle, inode,
    2027             :                                                   path + depth);
    2028      297264 :                         if (err)
    2029             :                                 return err;
    2030      297264 :                         unwritten = ext4_ext_is_unwritten(ex);
    2031      594528 :                         ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
    2032             :                                         + ext4_ext_get_actual_len(newext));
    2033      297264 :                         if (unwritten)
    2034      159888 :                                 ext4_ext_mark_unwritten(ex);
    2035      297264 :                         nearex = ex;
    2036      297264 :                         goto merge;
    2037             :                 }
    2038             : 
    2039      997568 : prepend:
    2040             :                 /* Try to prepend newex to the ex */
    2041     1131987 :                 if (ext4_can_extents_be_merged(inode, newext, ex)) {
    2042        1008 :                         ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d"
    2043             :                                   "(from %llu)\n",
    2044             :                                   le32_to_cpu(newext->ee_block),
    2045             :                                   ext4_ext_is_unwritten(newext),
    2046             :                                   ext4_ext_get_actual_len(newext),
    2047             :                                   le32_to_cpu(ex->ee_block),
    2048             :                                   ext4_ext_is_unwritten(ex),
    2049             :                                   ext4_ext_get_actual_len(ex),
    2050             :                                   ext4_ext_pblock(ex));
    2051        1008 :                         err = ext4_ext_get_access(handle, inode,
    2052             :                                                   path + depth);
    2053        1008 :                         if (err)
    2054             :                                 return err;
    2055             : 
    2056        1008 :                         unwritten = ext4_ext_is_unwritten(ex);
    2057        1008 :                         ex->ee_block = newext->ee_block;
    2058        1008 :                         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
    2059        2016 :                         ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
    2060             :                                         + ext4_ext_get_actual_len(newext));
    2061        1008 :                         if (unwritten)
    2062        1008 :                                 ext4_ext_mark_unwritten(ex);
    2063        1008 :                         nearex = ex;
    2064        1008 :                         goto merge;
    2065             :                 }
    2066             :         }
    2067             : 
    2068     5782899 :         depth = ext_depth(inode);
    2069     5782899 :         eh = path[depth].p_hdr;
    2070     5782899 :         if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
    2071     5713141 :                 goto has_space;
    2072             : 
    2073             :         /* probably next leaf has space for us? */
    2074       69758 :         fex = EXT_LAST_EXTENT(eh);
    2075       69758 :         next = EXT_MAX_BLOCKS;
    2076       69758 :         if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
    2077       27194 :                 next = ext4_ext_next_leaf_block(path);
    2078       27194 :         if (next != EXT_MAX_BLOCKS) {
    2079        1185 :                 ext_debug(inode, "next leaf block - %u\n", next);
    2080        1185 :                 BUG_ON(npath != NULL);
    2081        1185 :                 npath = ext4_find_extent(inode, next, NULL, gb_flags);
    2082        1185 :                 if (IS_ERR(npath))
    2083           0 :                         return PTR_ERR(npath);
    2084        1185 :                 BUG_ON(npath->p_depth != path->p_depth);
    2085        1185 :                 eh = npath[depth].p_hdr;
    2086        1185 :                 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
    2087        1182 :                         ext_debug(inode, "next leaf isn't full(%d)\n",
    2088             :                                   le16_to_cpu(eh->eh_entries));
    2089        1182 :                         path = npath;
    2090        1182 :                         goto has_space;
    2091             :                 }
    2092             :                 ext_debug(inode, "next leaf has no free space(%d,%d)\n",
    2093             :                           le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
    2094             :         }
    2095             : 
    2096             :         /*
    2097             :          * There is no free space in the found leaf.
    2098             :          * We're gonna add a new leaf in the tree.
    2099             :          */
    2100       68576 :         if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
    2101       34233 :                 mb_flags |= EXT4_MB_USE_RESERVED;
    2102       68576 :         err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
    2103             :                                        ppath, newext);
    2104       68574 :         if (err)
    2105         295 :                 goto cleanup;
    2106       68279 :         depth = ext_depth(inode);
    2107       68279 :         eh = path[depth].p_hdr;
    2108             : 
    2109     5782602 : has_space:
    2110     5782602 :         nearex = path[depth].p_ext;
    2111             : 
    2112     5782602 :         err = ext4_ext_get_access(handle, inode, path + depth);
    2113     5782668 :         if (err)
    2114           0 :                 goto cleanup;
    2115             : 
    2116     5782668 :         if (!nearex) {
    2117             :                 /* there is no extent in this leaf, create first one */
    2118      729716 :                 ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n",
    2119             :                                 le32_to_cpu(newext->ee_block),
    2120             :                                 ext4_ext_pblock(newext),
    2121             :                                 ext4_ext_is_unwritten(newext),
    2122             :                                 ext4_ext_get_actual_len(newext));
    2123      729716 :                 nearex = EXT_FIRST_EXTENT(eh);
    2124             :         } else {
    2125     5052952 :                 if (le32_to_cpu(newext->ee_block)
    2126     5052952 :                            > le32_to_cpu(nearex->ee_block)) {
    2127             :                         /* Insert after */
    2128     5004015 :                         ext_debug(inode, "insert %u:%llu:[%d]%d before: "
    2129             :                                         "nearest %p\n",
    2130             :                                         le32_to_cpu(newext->ee_block),
    2131             :                                         ext4_ext_pblock(newext),
    2132             :                                         ext4_ext_is_unwritten(newext),
    2133             :                                         ext4_ext_get_actual_len(newext),
    2134             :                                         nearex);
    2135     5004015 :                         nearex++;
    2136             :                 } else {
    2137             :                         /* Insert before */
    2138       48937 :                         BUG_ON(newext->ee_block == nearex->ee_block);
    2139             :                         ext_debug(inode, "insert %u:%llu:[%d]%d after: "
    2140             :                                         "nearest %p\n",
    2141             :                                         le32_to_cpu(newext->ee_block),
    2142             :                                         ext4_ext_pblock(newext),
    2143             :                                         ext4_ext_is_unwritten(newext),
    2144             :                                         ext4_ext_get_actual_len(newext),
    2145             :                                         nearex);
    2146             :                 }
    2147     5052952 :                 len = EXT_LAST_EXTENT(eh) - nearex + 1;
    2148     5052952 :                 if (len > 0) {
    2149     4183651 :                         ext_debug(inode, "insert %u:%llu:[%d]%d: "
    2150             :                                         "move %d extents from 0x%p to 0x%p\n",
    2151             :                                         le32_to_cpu(newext->ee_block),
    2152             :                                         ext4_ext_pblock(newext),
    2153             :                                         ext4_ext_is_unwritten(newext),
    2154             :                                         ext4_ext_get_actual_len(newext),
    2155             :                                         len, nearex, nearex + 1);
    2156     8367302 :                         memmove(nearex + 1, nearex,
    2157             :                                 len * sizeof(struct ext4_extent));
    2158             :                 }
    2159             :         }
    2160             : 
    2161     5782668 :         le16_add_cpu(&eh->eh_entries, 1);
    2162     5782668 :         path[depth].p_ext = nearex;
    2163     5782668 :         nearex->ee_block = newext->ee_block;
    2164     5782668 :         ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
    2165     5782668 :         nearex->ee_len = newext->ee_len;
    2166             : 
    2167     6080940 : merge:
    2168             :         /* try to merge extents */
    2169     6080940 :         if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
    2170     1853606 :                 ext4_ext_try_to_merge(handle, inode, path, nearex);
    2171             : 
    2172             : 
    2173             :         /* time to correct all indexes above */
    2174     6080679 :         err = ext4_ext_correct_indexes(handle, inode, path);
    2175     6080445 :         if (err)
    2176           0 :                 goto cleanup;
    2177             : 
    2178     6080445 :         err = ext4_ext_dirty(handle, inode, path + path->p_depth);
    2179             : 
    2180     6081800 : cleanup:
    2181     6081800 :         ext4_free_ext_path(npath);
    2182     6081658 :         return err;
    2183             : }
    2184             : 
    2185           0 : static int ext4_fill_es_cache_info(struct inode *inode,
    2186             :                                    ext4_lblk_t block, ext4_lblk_t num,
    2187             :                                    struct fiemap_extent_info *fieinfo)
    2188             : {
    2189           0 :         ext4_lblk_t next, end = block + num - 1;
    2190           0 :         struct extent_status es;
    2191           0 :         unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
    2192           0 :         unsigned int flags;
    2193           0 :         int err;
    2194             : 
    2195           0 :         while (block <= end) {
    2196           0 :                 next = 0;
    2197           0 :                 flags = 0;
    2198           0 :                 if (!ext4_es_lookup_extent(inode, block, &next, &es))
    2199             :                         break;
    2200           0 :                 if (ext4_es_is_unwritten(&es))
    2201           0 :                         flags |= FIEMAP_EXTENT_UNWRITTEN;
    2202           0 :                 if (ext4_es_is_delayed(&es))
    2203           0 :                         flags |= (FIEMAP_EXTENT_DELALLOC |
    2204             :                                   FIEMAP_EXTENT_UNKNOWN);
    2205           0 :                 if (ext4_es_is_hole(&es))
    2206           0 :                         flags |= EXT4_FIEMAP_EXTENT_HOLE;
    2207           0 :                 if (next == 0)
    2208           0 :                         flags |= FIEMAP_EXTENT_LAST;
    2209           0 :                 if (flags & (FIEMAP_EXTENT_DELALLOC|
    2210             :                              EXT4_FIEMAP_EXTENT_HOLE))
    2211           0 :                         es.es_pblk = 0;
    2212             :                 else
    2213           0 :                         es.es_pblk = ext4_es_pblock(&es);
    2214           0 :                 err = fiemap_fill_next_extent(fieinfo,
    2215           0 :                                 (__u64)es.es_lblk << blksize_bits,
    2216           0 :                                 (__u64)es.es_pblk << blksize_bits,
    2217           0 :                                 (__u64)es.es_len << blksize_bits,
    2218             :                                 flags);
    2219           0 :                 if (next == 0)
    2220             :                         break;
    2221           0 :                 block = next;
    2222           0 :                 if (err < 0)
    2223           0 :                         return err;
    2224           0 :                 if (err == 1)
    2225             :                         return 0;
    2226             :         }
    2227             :         return 0;
    2228             : }
    2229             : 
    2230             : 
    2231             : /*
    2232             :  * ext4_ext_determine_hole - determine hole around given block
    2233             :  * @inode:      inode we lookup in
    2234             :  * @path:       path in extent tree to @lblk
    2235             :  * @lblk:       pointer to logical block around which we want to determine hole
    2236             :  *
    2237             :  * Determine hole length (and start if easily possible) around given logical
    2238             :  * block. We don't try too hard to find the beginning of the hole but @path
    2239             :  * actually points to extent before @lblk, we provide it.
    2240             :  *
    2241             :  * The function returns the length of a hole starting at @lblk. We update @lblk
    2242             :  * to the beginning of the hole if we managed to find it.
    2243             :  */
    2244     3370535 : static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
    2245             :                                            struct ext4_ext_path *path,
    2246             :                                            ext4_lblk_t *lblk)
    2247             : {
    2248     3370535 :         int depth = ext_depth(inode);
    2249     3370535 :         struct ext4_extent *ex;
    2250     3370535 :         ext4_lblk_t len;
    2251             : 
    2252     3370535 :         ex = path[depth].p_ext;
    2253     3370535 :         if (ex == NULL) {
    2254             :                 /* there is no extent yet, so gap is [0;-] */
    2255     1793488 :                 *lblk = 0;
    2256     1793488 :                 len = EXT_MAX_BLOCKS;
    2257     1577047 :         } else if (*lblk < le32_to_cpu(ex->ee_block)) {
    2258       20353 :                 len = le32_to_cpu(ex->ee_block) - *lblk;
    2259     1556694 :         } else if (*lblk >= le32_to_cpu(ex->ee_block)
    2260     1556694 :                         + ext4_ext_get_actual_len(ex)) {
    2261     1556694 :                 ext4_lblk_t next;
    2262             : 
    2263     1556694 :                 *lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
    2264     1556694 :                 next = ext4_ext_next_allocated_block(path);
    2265     1556668 :                 BUG_ON(next == *lblk);
    2266     1556668 :                 len = next - *lblk;
    2267             :         } else {
    2268           0 :                 BUG();
    2269             :         }
    2270     3370509 :         return len;
    2271             : }
    2272             : 
    2273             : /*
    2274             :  * ext4_ext_put_gap_in_cache:
    2275             :  * calculate boundaries of the gap that the requested block fits into
    2276             :  * and cache this gap
    2277             :  */
    2278             : static void
    2279     3367189 : ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
    2280             :                           ext4_lblk_t hole_len)
    2281             : {
    2282     3367189 :         struct extent_status es;
    2283             : 
    2284     3367189 :         ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
    2285     3367189 :                                   hole_start + hole_len - 1, &es);
    2286     3376581 :         if (es.es_len) {
    2287             :                 /* There's delayed extent containing lblock? */
    2288      968098 :                 if (es.es_lblk <= hole_start)
    2289      313933 :                         return;
    2290      654165 :                 hole_len = min(es.es_lblk - hole_start, hole_len);
    2291             :         }
    2292     3062648 :         ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
    2293     3062648 :         ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
    2294             :                               EXTENT_STATUS_HOLE);
    2295             : }
    2296             : 
    2297             : /*
    2298             :  * ext4_ext_rm_idx:
    2299             :  * removes index from the index block.
    2300             :  */
    2301       13446 : static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
    2302             :                         struct ext4_ext_path *path, int depth)
    2303             : {
    2304       13446 :         int err;
    2305       13446 :         ext4_fsblk_t leaf;
    2306             : 
    2307             :         /* free index block */
    2308       13446 :         depth--;
    2309       13446 :         path = path + depth;
    2310       13446 :         leaf = ext4_idx_pblock(path->p_idx);
    2311       13446 :         if (unlikely(path->p_hdr->eh_entries == 0)) {
    2312           0 :                 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
    2313           0 :                 return -EFSCORRUPTED;
    2314             :         }
    2315       13446 :         err = ext4_ext_get_access(handle, inode, path);
    2316       13446 :         if (err)
    2317             :                 return err;
    2318             : 
    2319       13446 :         if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) {
    2320           0 :                 int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx;
    2321           0 :                 len *= sizeof(struct ext4_extent_idx);
    2322           0 :                 memmove(path->p_idx, path->p_idx + 1, len);
    2323             :         }
    2324             : 
    2325       13446 :         le16_add_cpu(&path->p_hdr->eh_entries, -1);
    2326       13446 :         err = ext4_ext_dirty(handle, inode, path);
    2327       13446 :         if (err)
    2328             :                 return err;
    2329       13446 :         ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf);
    2330       13446 :         trace_ext4_ext_rm_idx(inode, leaf);
    2331             : 
    2332       13446 :         ext4_free_blocks(handle, inode, NULL, leaf, 1,
    2333             :                          EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
    2334             : 
    2335       13486 :         while (--depth >= 0) {
    2336        2469 :                 if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
    2337             :                         break;
    2338          40 :                 path--;
    2339          40 :                 err = ext4_ext_get_access(handle, inode, path);
    2340          40 :                 if (err)
    2341             :                         break;
    2342          40 :                 path->p_idx->ei_block = (path+1)->p_idx->ei_block;
    2343          40 :                 err = ext4_ext_dirty(handle, inode, path);
    2344          40 :                 if (err)
    2345             :                         break;
    2346             :         }
    2347             :         return err;
    2348             : }
    2349             : 
    2350             : /*
    2351             :  * ext4_ext_calc_credits_for_single_extent:
    2352             :  * This routine returns max. credits that needed to insert an extent
    2353             :  * to the extent tree.
    2354             :  * When pass the actual path, the caller should calculate credits
    2355             :  * under i_data_sem.
    2356             :  */
    2357           0 : int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
    2358             :                                                 struct ext4_ext_path *path)
    2359             : {
    2360           0 :         if (path) {
    2361           0 :                 int depth = ext_depth(inode);
    2362           0 :                 int ret = 0;
    2363             : 
    2364             :                 /* probably there is space in leaf? */
    2365           0 :                 if (le16_to_cpu(path[depth].p_hdr->eh_entries)
    2366           0 :                                 < le16_to_cpu(path[depth].p_hdr->eh_max)) {
    2367             : 
    2368             :                         /*
    2369             :                          *  There are some space in the leaf tree, no
    2370             :                          *  need to account for leaf block credit
    2371             :                          *
    2372             :                          *  bitmaps and block group descriptor blocks
    2373             :                          *  and other metadata blocks still need to be
    2374             :                          *  accounted.
    2375             :                          */
    2376             :                         /* 1 bitmap, 1 block group descriptor */
    2377           0 :                         ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
    2378           0 :                         return ret;
    2379             :                 }
    2380             :         }
    2381             : 
    2382           0 :         return ext4_chunk_trans_blocks(inode, nrblocks);
    2383             : }
    2384             : 
    2385             : /*
    2386             :  * How many index/leaf blocks need to change/allocate to add @extents extents?
    2387             :  *
    2388             :  * If we add a single extent, then in the worse case, each tree level
    2389             :  * index/leaf need to be changed in case of the tree split.
    2390             :  *
    2391             :  * If more extents are inserted, they could cause the whole tree split more
    2392             :  * than once, but this is really rare.
    2393             :  */
    2394     8126615 : int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
    2395             : {
    2396     8323918 :         int index;
    2397     8323918 :         int depth;
    2398             : 
    2399             :         /* If we are converting the inline data, only one is needed here. */
    2400     8126615 :         if (ext4_has_inline_data(inode))
    2401             :                 return 1;
    2402             : 
    2403     8323918 :         depth = ext_depth(inode);
    2404             : 
    2405     8126615 :         if (extents <= 1)
    2406     8126607 :                 index = depth * 2;
    2407             :         else
    2408      197311 :                 index = depth * 3;
    2409             : 
    2410             :         return index;
    2411             : }
    2412             : 
    2413     2306563 : static inline int get_default_free_blocks_flags(struct inode *inode)
    2414             : {
    2415     2306563 :         if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
    2416             :             ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
    2417             :                 return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
    2418     2111576 :         else if (ext4_should_journal_data(inode))
    2419           0 :                 return EXT4_FREE_BLOCKS_FORGET;
    2420             :         return 0;
    2421             : }
    2422             : 
    2423             : /*
    2424             :  * ext4_rereserve_cluster - increment the reserved cluster count when
    2425             :  *                          freeing a cluster with a pending reservation
    2426             :  *
    2427             :  * @inode - file containing the cluster
    2428             :  * @lblk - logical block in cluster to be reserved
    2429             :  *
    2430             :  * Increments the reserved cluster count and adjusts quota in a bigalloc
    2431             :  * file system when freeing a partial cluster containing at least one
    2432             :  * delayed and unwritten block.  A partial cluster meeting that
    2433             :  * requirement will have a pending reservation.  If so, the
    2434             :  * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
    2435             :  * defer reserved and allocated space accounting to a subsequent call
    2436             :  * to this function.
    2437             :  */
    2438           0 : static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
    2439             : {
    2440           0 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    2441           0 :         struct ext4_inode_info *ei = EXT4_I(inode);
    2442             : 
    2443           0 :         dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
    2444             : 
    2445           0 :         spin_lock(&ei->i_block_reservation_lock);
    2446           0 :         ei->i_reserved_data_blocks++;
    2447           0 :         percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
    2448           0 :         spin_unlock(&ei->i_block_reservation_lock);
    2449             : 
    2450           0 :         percpu_counter_add(&sbi->s_freeclusters_counter, 1);
    2451           0 :         ext4_remove_pending(inode, lblk);
    2452           0 : }
    2453             : 
    2454     2306570 : static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
    2455             :                               struct ext4_extent *ex,
    2456             :                               struct partial_cluster *partial,
    2457             :                               ext4_lblk_t from, ext4_lblk_t to)
    2458             : {
    2459     2306570 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    2460     2306570 :         unsigned short ee_len = ext4_ext_get_actual_len(ex);
    2461     2306570 :         ext4_fsblk_t last_pblk, pblk;
    2462     2306570 :         ext4_lblk_t num;
    2463     2306570 :         int flags;
    2464             : 
    2465             :         /* only extent tail removal is allowed */
    2466     2306570 :         if (from < le32_to_cpu(ex->ee_block) ||
    2467     2306570 :             to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
    2468           0 :                 ext4_error(sbi->s_sb,
    2469             :                            "strange request: removal(2) %u-%u from %u:%u",
    2470             :                            from, to, le32_to_cpu(ex->ee_block), ee_len);
    2471           0 :                 return 0;
    2472             :         }
    2473             : 
    2474             : #ifdef EXTENTS_STATS
    2475             :         spin_lock(&sbi->s_ext_stats_lock);
    2476             :         sbi->s_ext_blocks += ee_len;
    2477             :         sbi->s_ext_extents++;
    2478             :         if (ee_len < sbi->s_ext_min)
    2479             :                 sbi->s_ext_min = ee_len;
    2480             :         if (ee_len > sbi->s_ext_max)
    2481             :                 sbi->s_ext_max = ee_len;
    2482             :         if (ext_depth(inode) > sbi->s_depth_max)
    2483             :                 sbi->s_depth_max = ext_depth(inode);
    2484             :         spin_unlock(&sbi->s_ext_stats_lock);
    2485             : #endif
    2486             : 
    2487     2306580 :         trace_ext4_remove_blocks(inode, ex, from, to, partial);
    2488             : 
    2489             :         /*
    2490             :          * if we have a partial cluster, and it's different from the
    2491             :          * cluster of the last block in the extent, we free it
    2492             :          */
    2493     2306575 :         last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
    2494             : 
    2495     2306575 :         if (partial->state != initial &&
    2496           0 :             partial->pclu != EXT4_B2C(sbi, last_pblk)) {
    2497           0 :                 if (partial->state == tofree) {
    2498           0 :                         flags = get_default_free_blocks_flags(inode);
    2499           0 :                         if (ext4_is_pending(inode, partial->lblk))
    2500           0 :                                 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
    2501           0 :                         ext4_free_blocks(handle, inode, NULL,
    2502           0 :                                          EXT4_C2B(sbi, partial->pclu),
    2503           0 :                                          sbi->s_cluster_ratio, flags);
    2504           0 :                         if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
    2505           0 :                                 ext4_rereserve_cluster(inode, partial->lblk);
    2506             :                 }
    2507           0 :                 partial->state = initial;
    2508             :         }
    2509             : 
    2510     2306575 :         num = le32_to_cpu(ex->ee_block) + ee_len - from;
    2511     2306575 :         pblk = ext4_ext_pblock(ex) + ee_len - num;
    2512             : 
    2513             :         /*
    2514             :          * We free the partial cluster at the end of the extent (if any),
    2515             :          * unless the cluster is used by another extent (partial_cluster
    2516             :          * state is nofree).  If a partial cluster exists here, it must be
    2517             :          * shared with the last block in the extent.
    2518             :          */
    2519     2306575 :         flags = get_default_free_blocks_flags(inode);
    2520             : 
    2521             :         /* partial, left end cluster aligned, right end unaligned */
    2522     2306562 :         if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
    2523           0 :             (EXT4_LBLK_CMASK(sbi, to) >= from) &&
    2524           0 :             (partial->state != nofree)) {
    2525           0 :                 if (ext4_is_pending(inode, to))
    2526           0 :                         flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
    2527           0 :                 ext4_free_blocks(handle, inode, NULL,
    2528           0 :                                  EXT4_PBLK_CMASK(sbi, last_pblk),
    2529           0 :                                  sbi->s_cluster_ratio, flags);
    2530           0 :                 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
    2531           0 :                         ext4_rereserve_cluster(inode, to);
    2532           0 :                 partial->state = initial;
    2533           0 :                 flags = get_default_free_blocks_flags(inode);
    2534             :         }
    2535             : 
    2536     2306562 :         flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
    2537             : 
    2538             :         /*
    2539             :          * For bigalloc file systems, we never free a partial cluster
    2540             :          * at the beginning of the extent.  Instead, we check to see if we
    2541             :          * need to free it on a subsequent call to ext4_remove_blocks,
    2542             :          * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
    2543             :          */
    2544     2306562 :         flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
    2545     2306562 :         ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
    2546             : 
    2547             :         /* reset the partial cluster if we've freed past it */
    2548     2306612 :         if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
    2549           0 :                 partial->state = initial;
    2550             : 
    2551             :         /*
    2552             :          * If we've freed the entire extent but the beginning is not left
    2553             :          * cluster aligned and is not marked as ineligible for freeing we
    2554             :          * record the partial cluster at the beginning of the extent.  It
    2555             :          * wasn't freed by the preceding ext4_free_blocks() call, and we
    2556             :          * need to look farther to the left to determine if it's to be freed
    2557             :          * (not shared with another extent). Else, reset the partial
    2558             :          * cluster - we're either  done freeing or the beginning of the
    2559             :          * extent is left cluster aligned.
    2560             :          */
    2561     2306612 :         if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
    2562           0 :                 if (partial->state == initial) {
    2563           0 :                         partial->pclu = EXT4_B2C(sbi, pblk);
    2564           0 :                         partial->lblk = from;
    2565           0 :                         partial->state = tofree;
    2566             :                 }
    2567             :         } else {
    2568     2306612 :                 partial->state = initial;
    2569             :         }
    2570             : 
    2571             :         return 0;
    2572             : }
    2573             : 
    2574             : /*
    2575             :  * ext4_ext_rm_leaf() Removes the extents associated with the
    2576             :  * blocks appearing between "start" and "end".  Both "start"
    2577             :  * and "end" must appear in the same extent or EIO is returned.
    2578             :  *
    2579             :  * @handle: The journal handle
    2580             :  * @inode:  The files inode
    2581             :  * @path:   The path to the leaf
    2582             :  * @partial_cluster: The cluster which we'll have to free if all extents
    2583             :  *                   has been released from it.  However, if this value is
    2584             :  *                   negative, it's a cluster just to the right of the
    2585             :  *                   punched region and it must not be freed.
    2586             :  * @start:  The first block to remove
    2587             :  * @end:   The last block to remove
    2588             :  */
    2589             : static int
    2590     1067625 : ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
    2591             :                  struct ext4_ext_path *path,
    2592             :                  struct partial_cluster *partial,
    2593             :                  ext4_lblk_t start, ext4_lblk_t end)
    2594             : {
    2595     1067625 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    2596     1067625 :         int err = 0, correct_index = 0;
    2597     1067625 :         int depth = ext_depth(inode), credits, revoke_credits;
    2598     1067625 :         struct ext4_extent_header *eh;
    2599     1067625 :         ext4_lblk_t a, b;
    2600     1067625 :         unsigned num;
    2601     1067625 :         ext4_lblk_t ex_ee_block;
    2602     1067625 :         unsigned short ex_ee_len;
    2603     1067625 :         unsigned unwritten = 0;
    2604     1067625 :         struct ext4_extent *ex;
    2605     1067625 :         ext4_fsblk_t pblk;
    2606             : 
    2607             :         /* the header must be checked already in ext4_ext_remove_space() */
    2608     1067625 :         ext_debug(inode, "truncate since %u in leaf to %u\n", start, end);
    2609     1067625 :         if (!path[depth].p_hdr)
    2610      212155 :                 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
    2611     1067625 :         eh = path[depth].p_hdr;
    2612     1067625 :         if (unlikely(path[depth].p_hdr == NULL)) {
    2613           0 :                 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
    2614           0 :                 return -EFSCORRUPTED;
    2615             :         }
    2616             :         /* find where to start removing */
    2617     1067625 :         ex = path[depth].p_ext;
    2618     1067625 :         if (!ex)
    2619      642236 :                 ex = EXT_LAST_EXTENT(eh);
    2620             : 
    2621     1067625 :         ex_ee_block = le32_to_cpu(ex->ee_block);
    2622     1067625 :         ex_ee_len = ext4_ext_get_actual_len(ex);
    2623             : 
    2624     1067625 :         trace_ext4_ext_rm_leaf(inode, start, ex, partial);
    2625             : 
    2626     3546315 :         while (ex >= EXT_FIRST_EXTENT(eh) &&
    2627     3098937 :                         ex_ee_block + ex_ee_len > start) {
    2628             : 
    2629     2478758 :                 if (ext4_ext_is_unwritten(ex))
    2630             :                         unwritten = 1;
    2631             :                 else
    2632     1303926 :                         unwritten = 0;
    2633             : 
    2634     2478758 :                 ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block,
    2635             :                           unwritten, ex_ee_len);
    2636     2478758 :                 path[depth].p_ext = ex;
    2637             : 
    2638     2478758 :                 a = max(ex_ee_block, start);
    2639     2478758 :                 b = min(ex_ee_block + ex_ee_len - 1, end);
    2640             : 
    2641     2478758 :                 ext_debug(inode, "  border %u:%u\n", a, b);
    2642             : 
    2643             :                 /* If this extent is beyond the end of the hole, skip it */
    2644     2478758 :                 if (end < ex_ee_block) {
    2645             :                         /*
    2646             :                          * We're going to skip this extent and move to another,
    2647             :                          * so note that its first cluster is in use to avoid
    2648             :                          * freeing it when removing blocks.  Eventually, the
    2649             :                          * right edge of the truncated/punched region will
    2650             :                          * be just to the left.
    2651             :                          */
    2652      172059 :                         if (sbi->s_cluster_ratio > 1) {
    2653           0 :                                 pblk = ext4_ext_pblock(ex);
    2654           0 :                                 partial->pclu = EXT4_B2C(sbi, pblk);
    2655           0 :                                 partial->state = nofree;
    2656             :                         }
    2657      172059 :                         ex--;
    2658      172059 :                         ex_ee_block = le32_to_cpu(ex->ee_block);
    2659      172059 :                         ex_ee_len = ext4_ext_get_actual_len(ex);
    2660      172059 :                         continue;
    2661     2306699 :                 } else if (b != ex_ee_block + ex_ee_len - 1) {
    2662           0 :                         EXT4_ERROR_INODE(inode,
    2663             :                                          "can not handle truncate %u:%u "
    2664             :                                          "on extent %u:%u",
    2665             :                                          start, end, ex_ee_block,
    2666             :                                          ex_ee_block + ex_ee_len - 1);
    2667           0 :                         err = -EFSCORRUPTED;
    2668           0 :                         goto out;
    2669     2306699 :                 } else if (a != ex_ee_block) {
    2670             :                         /* remove tail of the extent */
    2671      215717 :                         num = a - ex_ee_block;
    2672             :                 } else {
    2673             :                         /* remove whole extent: excellent! */
    2674             :                         num = 0;
    2675             :                 }
    2676             :                 /*
    2677             :                  * 3 for leaf, sb, and inode plus 2 (bmap and group
    2678             :                  * descriptor) for each block group; assume two block
    2679             :                  * groups plus ex_ee_len/blocks_per_block_group for
    2680             :                  * the worst case
    2681             :                  */
    2682     2306699 :                 credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
    2683     2306699 :                 if (ex == EXT_FIRST_EXTENT(eh)) {
    2684      336079 :                         correct_index = 1;
    2685      336079 :                         credits += (ext_depth(inode)) + 1;
    2686             :                 }
    2687     2306699 :                 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
    2688             :                 /*
    2689             :                  * We may end up freeing some index blocks and data from the
    2690             :                  * punched range. Note that partial clusters are accounted for
    2691             :                  * by ext4_free_data_revoke_credits().
    2692             :                  */
    2693     2306699 :                 revoke_credits =
    2694             :                         ext4_free_metadata_revoke_credits(inode->i_sb,
    2695             :                                                           ext_depth(inode)) +
    2696     2306699 :                         ext4_free_data_revoke_credits(inode, b - a + 1);
    2697             : 
    2698     2306642 :                 err = ext4_datasem_ensure_credits(handle, inode, credits,
    2699             :                                                   credits, revoke_credits);
    2700     2306633 :                 if (err) {
    2701          70 :                         if (err > 0)
    2702          70 :                                 err = -EAGAIN;
    2703          70 :                         goto out;
    2704             :                 }
    2705             : 
    2706     2306563 :                 err = ext4_ext_get_access(handle, inode, path + depth);
    2707     2306569 :                 if (err)
    2708           0 :                         goto out;
    2709             : 
    2710     2306569 :                 err = ext4_remove_blocks(handle, inode, ex, partial, a, b);
    2711     2306597 :                 if (err)
    2712           0 :                         goto out;
    2713             : 
    2714     2306597 :                 if (num == 0)
    2715             :                         /* this extent is removed; mark slot entirely unused */
    2716     2090891 :                         ext4_ext_store_pblock(ex, 0);
    2717             : 
    2718     2306597 :                 ex->ee_len = cpu_to_le16(num);
    2719             :                 /*
    2720             :                  * Do not mark unwritten if all the blocks in the
    2721             :                  * extent have been removed.
    2722             :                  */
    2723     2306597 :                 if (unwritten && num)
    2724       78144 :                         ext4_ext_mark_unwritten(ex);
    2725             :                 /*
    2726             :                  * If the extent was completely released,
    2727             :                  * we need to remove it from the leaf
    2728             :                  */
    2729     2306597 :                 if (num == 0) {
    2730     2090890 :                         if (end != EXT_MAX_BLOCKS - 1) {
    2731             :                                 /*
    2732             :                                  * For hole punching, we need to scoot all the
    2733             :                                  * extents up when an extent is removed so that
    2734             :                                  * we dont have blank extents in the middle
    2735             :                                  */
    2736      787768 :                                 memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) *
    2737             :                                         sizeof(struct ext4_extent));
    2738             : 
    2739             :                                 /* Now get rid of the one at the end */
    2740      787768 :                                 memset(EXT_LAST_EXTENT(eh), 0,
    2741             :                                         sizeof(struct ext4_extent));
    2742             :                         }
    2743     2090890 :                         le16_add_cpu(&eh->eh_entries, -1);
    2744             :                 }
    2745             : 
    2746     2306597 :                 err = ext4_ext_dirty(handle, inode, path + depth);
    2747     2306631 :                 if (err)
    2748           0 :                         goto out;
    2749             : 
    2750     2306631 :                 ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num,
    2751             :                                 ext4_ext_pblock(ex));
    2752     2306631 :                 ex--;
    2753     2306631 :                 ex_ee_block = le32_to_cpu(ex->ee_block);
    2754     4613262 :                 ex_ee_len = ext4_ext_get_actual_len(ex);
    2755             :         }
    2756             : 
    2757     1067493 :         if (correct_index && eh->eh_entries)
    2758       21731 :                 err = ext4_ext_correct_indexes(handle, inode, path);
    2759             : 
    2760             :         /*
    2761             :          * If there's a partial cluster and at least one extent remains in
    2762             :          * the leaf, free the partial cluster if it isn't shared with the
    2763             :          * current extent.  If it is shared with the current extent
    2764             :          * we reset the partial cluster because we've reached the start of the
    2765             :          * truncated/punched region and we're done removing blocks.
    2766             :          */
    2767     1067493 :         if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
    2768           0 :                 pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
    2769           0 :                 if (partial->pclu != EXT4_B2C(sbi, pblk)) {
    2770           0 :                         int flags = get_default_free_blocks_flags(inode);
    2771             : 
    2772           0 :                         if (ext4_is_pending(inode, partial->lblk))
    2773           0 :                                 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
    2774           0 :                         ext4_free_blocks(handle, inode, NULL,
    2775           0 :                                          EXT4_C2B(sbi, partial->pclu),
    2776           0 :                                          sbi->s_cluster_ratio, flags);
    2777           0 :                         if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
    2778           0 :                                 ext4_rereserve_cluster(inode, partial->lblk);
    2779             :                 }
    2780           0 :                 partial->state = initial;
    2781             :         }
    2782             : 
    2783             :         /* if this leaf is free, then we should
    2784             :          * remove it from index block above */
    2785     1067493 :         if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
    2786       13406 :                 err = ext4_ext_rm_idx(handle, inode, path, depth);
    2787             : 
    2788     1054087 : out:
    2789             :         return err;
    2790             : }
    2791             : 
    2792             : /*
    2793             :  * ext4_ext_more_to_rm:
    2794             :  * returns 1 if current index has to be freed (even partial)
    2795             :  */
    2796             : static int
    2797      795268 : ext4_ext_more_to_rm(struct ext4_ext_path *path)
    2798             : {
    2799      795268 :         BUG_ON(path->p_idx == NULL);
    2800             : 
    2801      795268 :         if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
    2802             :                 return 0;
    2803             : 
    2804             :         /*
    2805             :          * if truncate on deeper level happened, it wasn't partial,
    2806             :          * so we have to consider current index for truncation
    2807             :          */
    2808      254693 :         if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
    2809       42254 :                 return 0;
    2810             :         return 1;
    2811             : }
    2812             : 
    2813     1030563 : int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
    2814             :                           ext4_lblk_t end)
    2815             : {
    2816     1030563 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    2817     1030563 :         int depth = ext_depth(inode);
    2818     1030563 :         struct ext4_ext_path *path = NULL;
    2819     1030563 :         struct partial_cluster partial;
    2820     1030563 :         handle_t *handle;
    2821     1030563 :         int i = 0, err = 0;
    2822             : 
    2823     1030563 :         partial.pclu = 0;
    2824     1030563 :         partial.lblk = 0;
    2825     1030563 :         partial.state = initial;
    2826             : 
    2827     1030563 :         ext_debug(inode, "truncate since %u to %u\n", start, end);
    2828             : 
    2829             :         /* probably first extent we're gonna free will be last in block */
    2830     1030563 :         handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
    2831             :                         depth + 1,
    2832             :                         ext4_free_metadata_revoke_credits(inode->i_sb, depth));
    2833     1030555 :         if (IS_ERR(handle))
    2834           0 :                 return PTR_ERR(handle);
    2835             : 
    2836     1030555 : again:
    2837     1030625 :         trace_ext4_ext_remove_space(inode, start, end, depth);
    2838             : 
    2839             :         /*
    2840             :          * Check if we are removing extents inside the extent tree. If that
    2841             :          * is the case, we are going to punch a hole inside the extent tree
    2842             :          * so we have to check whether we need to split the extent covering
    2843             :          * the last block to remove so we can easily remove the part of it
    2844             :          * in ext4_ext_rm_leaf().
    2845             :          */
    2846     1030600 :         if (end < EXT_MAX_BLOCKS - 1) {
    2847      435669 :                 struct ext4_extent *ex;
    2848      435669 :                 ext4_lblk_t ee_block, ex_end, lblk;
    2849      435669 :                 ext4_fsblk_t pblk;
    2850             : 
    2851             :                 /* find extent for or closest extent to this block */
    2852      435669 :                 path = ext4_find_extent(inode, end, NULL,
    2853             :                                         EXT4_EX_NOCACHE | EXT4_EX_NOFAIL);
    2854      435665 :                 if (IS_ERR(path)) {
    2855           0 :                         ext4_journal_stop(handle);
    2856           0 :                         return PTR_ERR(path);
    2857             :                 }
    2858      435665 :                 depth = ext_depth(inode);
    2859             :                 /* Leaf not may not exist only if inode has no blocks at all */
    2860      435665 :                 ex = path[depth].p_ext;
    2861      435665 :                 if (!ex) {
    2862       10297 :                         if (depth) {
    2863           0 :                                 EXT4_ERROR_INODE(inode,
    2864             :                                                  "path[%d].p_hdr == NULL",
    2865             :                                                  depth);
    2866           0 :                                 err = -EFSCORRUPTED;
    2867             :                         }
    2868       10297 :                         goto out;
    2869             :                 }
    2870             : 
    2871      425368 :                 ee_block = le32_to_cpu(ex->ee_block);
    2872      425368 :                 ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1;
    2873             : 
    2874             :                 /*
    2875             :                  * See if the last block is inside the extent, if so split
    2876             :                  * the extent at 'end' block so we can easily remove the
    2877             :                  * tail of the first part of the split extent in
    2878             :                  * ext4_ext_rm_leaf().
    2879             :                  */
    2880      425368 :                 if (end >= ee_block && end < ex_end) {
    2881             : 
    2882             :                         /*
    2883             :                          * If we're going to split the extent, note that
    2884             :                          * the cluster containing the block after 'end' is
    2885             :                          * in use to avoid freeing it when removing blocks.
    2886             :                          */
    2887      165404 :                         if (sbi->s_cluster_ratio > 1) {
    2888           0 :                                 pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
    2889           0 :                                 partial.pclu = EXT4_B2C(sbi, pblk);
    2890           0 :                                 partial.state = nofree;
    2891             :                         }
    2892             : 
    2893             :                         /*
    2894             :                          * Split the extent in two so that 'end' is the last
    2895             :                          * block in the first new extent. Also we should not
    2896             :                          * fail removing space due to ENOSPC so try to use
    2897             :                          * reserved block if that happens.
    2898             :                          */
    2899      165404 :                         err = ext4_force_split_extent_at(handle, inode, &path,
    2900             :                                                          end + 1, 1);
    2901      165404 :                         if (err < 0)
    2902           0 :                                 goto out;
    2903             : 
    2904      259964 :                 } else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
    2905           0 :                            partial.state == initial) {
    2906             :                         /*
    2907             :                          * If we're punching, there's an extent to the right.
    2908             :                          * If the partial cluster hasn't been set, set it to
    2909             :                          * that extent's first cluster and its state to nofree
    2910             :                          * so it won't be freed should it contain blocks to be
    2911             :                          * removed. If it's already set (tofree/nofree), we're
    2912             :                          * retrying and keep the original partial cluster info
    2913             :                          * so a cluster marked tofree as a result of earlier
    2914             :                          * extent removal is not lost.
    2915             :                          */
    2916           0 :                         lblk = ex_end + 1;
    2917           0 :                         err = ext4_ext_search_right(inode, path, &lblk, &pblk,
    2918             :                                                     NULL);
    2919           0 :                         if (err < 0)
    2920           0 :                                 goto out;
    2921           0 :                         if (pblk) {
    2922           0 :                                 partial.pclu = EXT4_B2C(sbi, pblk);
    2923           0 :                                 partial.state = nofree;
    2924             :                         }
    2925             :                 }
    2926             :         }
    2927             :         /*
    2928             :          * We start scanning from right side, freeing all the blocks
    2929             :          * after i_size and walking into the tree depth-wise.
    2930             :          */
    2931     1020299 :         depth = ext_depth(inode);
    2932     1020299 :         if (path) {
    2933             :                 int k = i = depth;
    2934      468370 :                 while (--k > 0)
    2935       42990 :                         path[k].p_block =
    2936       42990 :                                 le16_to_cpu(path[k].p_hdr->eh_entries)+1;
    2937             :         } else {
    2938      594919 :                 path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
    2939             :                                GFP_NOFS | __GFP_NOFAIL);
    2940      594912 :                 if (path == NULL) {
    2941           0 :                         ext4_journal_stop(handle);
    2942           0 :                         return -ENOMEM;
    2943             :                 }
    2944      594912 :                 path[0].p_maxdepth = path[0].p_depth = depth;
    2945      594912 :                 path[0].p_hdr = ext_inode_hdr(inode);
    2946      594912 :                 i = 0;
    2947             : 
    2948      594912 :                 if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
    2949           0 :                         err = -EFSCORRUPTED;
    2950           0 :                         goto out;
    2951             :                 }
    2952             :         }
    2953             :         err = 0;
    2954             : 
    2955     2883126 :         while (i >= 0 && err == 0) {
    2956     1862840 :                 if (i == depth) {
    2957             :                         /* this is leaf block */
    2958     1067619 :                         err = ext4_ext_rm_leaf(handle, inode, path,
    2959             :                                                &partial, start, end);
    2960             :                         /* root level has p_bh == NULL, brelse() eats this */
    2961     1067505 :                         brelse(path[i].p_bh);
    2962     1067555 :                         path[i].p_bh = NULL;
    2963     1067555 :                         i--;
    2964     1067555 :                         continue;
    2965             :                 }
    2966             : 
    2967             :                 /* this is index block */
    2968      795221 :                 if (!path[i].p_hdr) {
    2969         290 :                         ext_debug(inode, "initialize header\n");
    2970         290 :                         path[i].p_hdr = ext_block_hdr(path[i].p_bh);
    2971             :                 }
    2972             : 
    2973      795221 :                 if (!path[i].p_idx) {
    2974             :                         /* this level hasn't been touched yet */
    2975      165148 :                         path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
    2976      165148 :                         path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
    2977      165148 :                         ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
    2978             :                                   path[i].p_hdr,
    2979             :                                   le16_to_cpu(path[i].p_hdr->eh_entries));
    2980             :                 } else {
    2981             :                         /* we were already here, see at next index */
    2982      630073 :                         path[i].p_idx--;
    2983             :                 }
    2984             : 
    2985      795221 :                 ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
    2986             :                                 i, EXT_FIRST_INDEX(path[i].p_hdr),
    2987             :                                 path[i].p_idx);
    2988      795221 :                 if (ext4_ext_more_to_rm(path + i)) {
    2989      212441 :                         struct buffer_head *bh;
    2990             :                         /* go to the next level */
    2991      212441 :                         ext_debug(inode, "move to level %d (block %llu)\n",
    2992             :                                   i + 1, ext4_idx_pblock(path[i].p_idx));
    2993      212441 :                         memset(path + i + 1, 0, sizeof(*path));
    2994      212441 :                         bh = read_extent_tree_block(inode, path[i].p_idx,
    2995             :                                                     depth - i - 1,
    2996             :                                                     EXT4_EX_NOCACHE);
    2997      212449 :                         if (IS_ERR(bh)) {
    2998             :                                 /* should we reset i_size? */
    2999           1 :                                 err = PTR_ERR(bh);
    3000           1 :                                 break;
    3001             :                         }
    3002             :                         /* Yield here to deal with large extent trees.
    3003             :                          * Should be a no-op if we did IO above. */
    3004      212448 :                         cond_resched();
    3005      212445 :                         if (WARN_ON(i + 1 > depth)) {
    3006             :                                 err = -EFSCORRUPTED;
    3007             :                                 break;
    3008             :                         }
    3009      212445 :                         path[i + 1].p_bh = bh;
    3010             : 
    3011             :                         /* save actual number of indexes since this
    3012             :                          * number is changed at the next iteration */
    3013      212445 :                         path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
    3014      212445 :                         i++;
    3015             :                 } else {
    3016             :                         /* we finished processing this index, go up */
    3017      582827 :                         if (path[i].p_hdr->eh_entries == 0 && i > 0) {
    3018             :                                 /* index is empty, remove it;
    3019             :                                  * handle must be already prepared by the
    3020             :                                  * truncatei_leaf() */
    3021          40 :                                 err = ext4_ext_rm_idx(handle, inode, path, i);
    3022             :                         }
    3023             :                         /* root level has p_bh == NULL, brelse() eats this */
    3024      582827 :                         brelse(path[i].p_bh);
    3025      582827 :                         path[i].p_bh = NULL;
    3026      582827 :                         i--;
    3027      582827 :                         ext_debug(inode, "return to level %d\n", i);
    3028             :                 }
    3029             :         }
    3030             : 
    3031     1020287 :         trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial,
    3032     1020287 :                                          path->p_hdr->eh_entries);
    3033             : 
    3034             :         /*
    3035             :          * if there's a partial cluster and we have removed the first extent
    3036             :          * in the file, then we also free the partial cluster, if any
    3037             :          */
    3038     1020166 :         if (partial.state == tofree && err == 0) {
    3039           0 :                 int flags = get_default_free_blocks_flags(inode);
    3040             : 
    3041           0 :                 if (ext4_is_pending(inode, partial.lblk))
    3042           0 :                         flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
    3043           0 :                 ext4_free_blocks(handle, inode, NULL,
    3044           0 :                                  EXT4_C2B(sbi, partial.pclu),
    3045           0 :                                  sbi->s_cluster_ratio, flags);
    3046           0 :                 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
    3047           0 :                         ext4_rereserve_cluster(inode, partial.lblk);
    3048           0 :                 partial.state = initial;
    3049             :         }
    3050             : 
    3051             :         /* TODO: flexible tree reduction should be here */
    3052     1020166 :         if (path->p_hdr->eh_entries == 0) {
    3053             :                 /*
    3054             :                  * truncate to zero freed all the tree,
    3055             :                  * so we need to correct eh_depth
    3056             :                  */
    3057      416310 :                 err = ext4_ext_get_access(handle, inode, path);
    3058      416292 :                 if (err == 0) {
    3059      416292 :                         ext_inode_hdr(inode)->eh_depth = 0;
    3060      416292 :                         ext_inode_hdr(inode)->eh_max =
    3061             :                                 cpu_to_le16(ext4_ext_space_root(inode, 0));
    3062      416292 :                         err = ext4_ext_dirty(handle, inode, path);
    3063             :                 }
    3064             :         }
    3065      603856 : out:
    3066     1030781 :         ext4_free_ext_path(path);
    3067     1030581 :         path = NULL;
    3068     1030581 :         if (err == -EAGAIN)
    3069          70 :                 goto again;
    3070     1030511 :         ext4_journal_stop(handle);
    3071             : 
    3072     1030511 :         return err;
    3073             : }
    3074             : 
    3075             : /*
    3076             :  * called at mount time
    3077             :  */
    3078        2513 : void ext4_ext_init(struct super_block *sb)
    3079             : {
    3080             :         /*
    3081             :          * possible initialization would be here
    3082             :          */
    3083             : 
    3084        2513 :         if (ext4_has_feature_extents(sb)) {
    3085             : #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
    3086             :                 printk(KERN_INFO "EXT4-fs: file extents enabled"
    3087             : #ifdef AGGRESSIVE_TEST
    3088             :                        ", aggressive tests"
    3089             : #endif
    3090             : #ifdef CHECK_BINSEARCH
    3091             :                        ", check binsearch"
    3092             : #endif
    3093             : #ifdef EXTENTS_STATS
    3094             :                        ", stats"
    3095             : #endif
    3096             :                        "\n");
    3097             : #endif
    3098             : #ifdef EXTENTS_STATS
    3099             :                 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
    3100             :                 EXT4_SB(sb)->s_ext_min = 1 << 30;
    3101             :                 EXT4_SB(sb)->s_ext_max = 0;
    3102             : #endif
    3103        2513 :         }
    3104        2513 : }
    3105             : 
    3106             : /*
    3107             :  * called at umount time
    3108             :  */
    3109        2513 : void ext4_ext_release(struct super_block *sb)
    3110             : {
    3111        2513 :         if (!ext4_has_feature_extents(sb))
    3112             :                 return;
    3113             : 
    3114             : #ifdef EXTENTS_STATS
    3115             :         if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
    3116             :                 struct ext4_sb_info *sbi = EXT4_SB(sb);
    3117             :                 printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
    3118             :                         sbi->s_ext_blocks, sbi->s_ext_extents,
    3119             :                         sbi->s_ext_blocks / sbi->s_ext_extents);
    3120             :                 printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
    3121             :                         sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
    3122             :         }
    3123             : #endif
    3124             : }
    3125             : 
    3126        9952 : static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
    3127             : {
    3128        9952 :         ext4_lblk_t  ee_block;
    3129        9952 :         ext4_fsblk_t ee_pblock;
    3130        9952 :         unsigned int ee_len;
    3131             : 
    3132        9952 :         ee_block  = le32_to_cpu(ex->ee_block);
    3133        9952 :         ee_len    = ext4_ext_get_actual_len(ex);
    3134        9952 :         ee_pblock = ext4_ext_pblock(ex);
    3135             : 
    3136        9952 :         if (ee_len == 0)
    3137             :                 return;
    3138             : 
    3139           0 :         ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
    3140             :                               EXTENT_STATUS_WRITTEN);
    3141             : }
    3142             : 
    3143             : /* FIXME!! we need to try to merge to left or right after zero-out  */
    3144           0 : static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
    3145             : {
    3146           0 :         ext4_fsblk_t ee_pblock;
    3147           0 :         unsigned int ee_len;
    3148             : 
    3149           0 :         ee_len    = ext4_ext_get_actual_len(ex);
    3150           0 :         ee_pblock = ext4_ext_pblock(ex);
    3151           0 :         return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
    3152             :                                   ee_len);
    3153             : }
    3154             : 
    3155             : /*
    3156             :  * ext4_split_extent_at() splits an extent at given block.
    3157             :  *
    3158             :  * @handle: the journal handle
    3159             :  * @inode: the file inode
    3160             :  * @path: the path to the extent
    3161             :  * @split: the logical block where the extent is splitted.
    3162             :  * @split_flags: indicates if the extent could be zeroout if split fails, and
    3163             :  *               the states(init or unwritten) of new extents.
    3164             :  * @flags: flags used to insert new extent to extent tree.
    3165             :  *
    3166             :  *
    3167             :  * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
    3168             :  * of which are determined by split_flag.
    3169             :  *
    3170             :  * There are two cases:
    3171             :  *  a> the extent are splitted into two extent.
    3172             :  *  b> split is not needed, and just mark the extent.
    3173             :  *
    3174             :  * return 0 on success.
    3175             :  */
    3176     3238753 : static int ext4_split_extent_at(handle_t *handle,
    3177             :                              struct inode *inode,
    3178             :                              struct ext4_ext_path **ppath,
    3179             :                              ext4_lblk_t split,
    3180             :                              int split_flag,
    3181             :                              int flags)
    3182             : {
    3183     3238753 :         struct ext4_ext_path *path = *ppath;
    3184     3238753 :         ext4_fsblk_t newblock;
    3185     3238753 :         ext4_lblk_t ee_block;
    3186     3238753 :         struct ext4_extent *ex, newex, orig_ex, zero_ex;
    3187     3238753 :         struct ext4_extent *ex2 = NULL;
    3188     3238753 :         unsigned int ee_len, depth;
    3189     3238753 :         int err = 0;
    3190             : 
    3191     3238753 :         BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
    3192             :                (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
    3193             : 
    3194     3238753 :         ext_debug(inode, "logical block %llu\n", (unsigned long long)split);
    3195             : 
    3196     3238753 :         ext4_ext_show_leaf(inode, path);
    3197             : 
    3198     3238753 :         depth = ext_depth(inode);
    3199     3238753 :         ex = path[depth].p_ext;
    3200     3238753 :         ee_block = le32_to_cpu(ex->ee_block);
    3201     3238753 :         ee_len = ext4_ext_get_actual_len(ex);
    3202     3238753 :         newblock = split - ee_block + ext4_ext_pblock(ex);
    3203             : 
    3204     3238753 :         BUG_ON(split < ee_block || split >= (ee_block + ee_len));
    3205     3238753 :         BUG_ON(!ext4_ext_is_unwritten(ex) &&
    3206             :                split_flag & (EXT4_EXT_MAY_ZEROOUT |
    3207             :                              EXT4_EXT_MARK_UNWRIT1 |
    3208             :                              EXT4_EXT_MARK_UNWRIT2));
    3209             : 
    3210     3238753 :         err = ext4_ext_get_access(handle, inode, path + depth);
    3211     3238754 :         if (err)
    3212           0 :                 goto out;
    3213             : 
    3214     3238754 :         if (split == ee_block) {
    3215             :                 /*
    3216             :                  * case b: block @split is the block that the extent begins with
    3217             :                  * then we just change the state of the extent, and splitting
    3218             :                  * is not needed.
    3219             :                  */
    3220      250609 :                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
    3221      226607 :                         ext4_ext_mark_unwritten(ex);
    3222             :                 else
    3223       24002 :                         ext4_ext_mark_initialized(ex);
    3224             : 
    3225      250609 :                 if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
    3226        2245 :                         ext4_ext_try_to_merge(handle, inode, path, ex);
    3227             : 
    3228      250609 :                 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
    3229      250609 :                 goto out;
    3230             :         }
    3231             : 
    3232             :         /* case a */
    3233     2988145 :         memcpy(&orig_ex, ex, sizeof(orig_ex));
    3234     2988145 :         ex->ee_len = cpu_to_le16(split - ee_block);
    3235     2988145 :         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
    3236     2741936 :                 ext4_ext_mark_unwritten(ex);
    3237             : 
    3238             :         /*
    3239             :          * path may lead to new leaf, not to original leaf any more
    3240             :          * after ext4_ext_insert_extent() returns,
    3241             :          */
    3242     2988145 :         err = ext4_ext_dirty(handle, inode, path + depth);
    3243     2988145 :         if (err)
    3244           0 :                 goto fix_extent_len;
    3245             : 
    3246     2988145 :         ex2 = &newex;
    3247     2988145 :         ex2->ee_block = cpu_to_le32(split);
    3248     2988145 :         ex2->ee_len   = cpu_to_le16(ee_len - (split - ee_block));
    3249     2988145 :         ext4_ext_store_pblock(ex2, newblock);
    3250     2988145 :         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
    3251     2739552 :                 ext4_ext_mark_unwritten(ex2);
    3252             : 
    3253     2988145 :         err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
    3254     2988145 :         if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM)
    3255     2988145 :                 goto out;
    3256             : 
    3257           0 :         if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
    3258           0 :                 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
    3259           0 :                         if (split_flag & EXT4_EXT_DATA_VALID1) {
    3260           0 :                                 err = ext4_ext_zeroout(inode, ex2);
    3261           0 :                                 zero_ex.ee_block = ex2->ee_block;
    3262           0 :                                 zero_ex.ee_len = cpu_to_le16(
    3263             :                                                 ext4_ext_get_actual_len(ex2));
    3264           0 :                                 ext4_ext_store_pblock(&zero_ex,
    3265             :                                                       ext4_ext_pblock(ex2));
    3266             :                         } else {
    3267           0 :                                 err = ext4_ext_zeroout(inode, ex);
    3268           0 :                                 zero_ex.ee_block = ex->ee_block;
    3269           0 :                                 zero_ex.ee_len = cpu_to_le16(
    3270             :                                                 ext4_ext_get_actual_len(ex));
    3271           0 :                                 ext4_ext_store_pblock(&zero_ex,
    3272             :                                                       ext4_ext_pblock(ex));
    3273             :                         }
    3274             :                 } else {
    3275           0 :                         err = ext4_ext_zeroout(inode, &orig_ex);
    3276           0 :                         zero_ex.ee_block = orig_ex.ee_block;
    3277           0 :                         zero_ex.ee_len = cpu_to_le16(
    3278             :                                                 ext4_ext_get_actual_len(&orig_ex));
    3279           0 :                         ext4_ext_store_pblock(&zero_ex,
    3280             :                                               ext4_ext_pblock(&orig_ex));
    3281             :                 }
    3282             : 
    3283           0 :                 if (!err) {
    3284             :                         /* update the extent length and mark as initialized */
    3285           0 :                         ex->ee_len = cpu_to_le16(ee_len);
    3286           0 :                         ext4_ext_try_to_merge(handle, inode, path, ex);
    3287           0 :                         err = ext4_ext_dirty(handle, inode, path + path->p_depth);
    3288           0 :                         if (!err)
    3289             :                                 /* update extent status tree */
    3290           0 :                                 ext4_zeroout_es(inode, &zero_ex);
    3291             :                         /* If we failed at this point, we don't know in which
    3292             :                          * state the extent tree exactly is so don't try to fix
    3293             :                          * length of the original extent as it may do even more
    3294             :                          * damage.
    3295             :                          */
    3296           0 :                         goto out;
    3297             :                 }
    3298             :         }
    3299             : 
    3300           0 : fix_extent_len:
    3301           0 :         ex->ee_len = orig_ex.ee_len;
    3302             :         /*
    3303             :          * Ignore ext4_ext_dirty return value since we are already in error path
    3304             :          * and err is a non-zero error code.
    3305             :          */
    3306           0 :         ext4_ext_dirty(handle, inode, path + path->p_depth);
    3307           0 :         return err;
    3308             : out:
    3309             :         ext4_ext_show_leaf(inode, path);
    3310             :         return err;
    3311             : }
    3312             : 
    3313             : /*
    3314             :  * ext4_split_extents() splits an extent and mark extent which is covered
    3315             :  * by @map as split_flags indicates
    3316             :  *
    3317             :  * It may result in splitting the extent into multiple extents (up to three)
    3318             :  * There are three possibilities:
    3319             :  *   a> There is no split required
    3320             :  *   b> Splits in two extents: Split is happening at either end of the extent
    3321             :  *   c> Splits in three extents: Somone is splitting in middle of the extent
    3322             :  *
    3323             :  */
    3324      465427 : static int ext4_split_extent(handle_t *handle,
    3325             :                               struct inode *inode,
    3326             :                               struct ext4_ext_path **ppath,
    3327             :                               struct ext4_map_blocks *map,
    3328             :                               int split_flag,
    3329             :                               int flags)
    3330             : {
    3331      465427 :         struct ext4_ext_path *path = *ppath;
    3332      465427 :         ext4_lblk_t ee_block;
    3333      465427 :         struct ext4_extent *ex;
    3334      465427 :         unsigned int ee_len, depth;
    3335      465427 :         int err = 0;
    3336      465427 :         int unwritten;
    3337      465427 :         int split_flag1, flags1;
    3338      465427 :         int allocated = map->m_len;
    3339             : 
    3340      465427 :         depth = ext_depth(inode);
    3341      465427 :         ex = path[depth].p_ext;
    3342      465427 :         ee_block = le32_to_cpu(ex->ee_block);
    3343      465427 :         ee_len = ext4_ext_get_actual_len(ex);
    3344      465427 :         unwritten = ext4_ext_is_unwritten(ex);
    3345             : 
    3346      465427 :         if (map->m_lblk + map->m_len < ee_block + ee_len) {
    3347      233532 :                 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
    3348      233532 :                 flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
    3349      233532 :                 if (unwritten)
    3350      204879 :                         split_flag1 |= EXT4_EXT_MARK_UNWRIT1 |
    3351             :                                        EXT4_EXT_MARK_UNWRIT2;
    3352      233532 :                 if (split_flag & EXT4_EXT_DATA_VALID2)
    3353      204879 :                         split_flag1 |= EXT4_EXT_DATA_VALID1;
    3354      233532 :                 err = ext4_split_extent_at(handle, inode, ppath,
    3355             :                                 map->m_lblk + map->m_len, split_flag1, flags1);
    3356      233532 :                 if (err)
    3357           0 :                         goto out;
    3358             :         } else {
    3359      231895 :                 allocated = ee_len - (map->m_lblk - ee_block);
    3360             :         }
    3361             :         /*
    3362             :          * Update path is required because previous ext4_split_extent_at() may
    3363             :          * result in split of original leaf or extent zeroout.
    3364             :          */
    3365      465427 :         path = ext4_find_extent(inode, map->m_lblk, ppath, flags);
    3366      465429 :         if (IS_ERR(path))
    3367           0 :                 return PTR_ERR(path);
    3368      465429 :         depth = ext_depth(inode);
    3369      465429 :         ex = path[depth].p_ext;
    3370      465429 :         if (!ex) {
    3371           0 :                 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
    3372             :                                  (unsigned long) map->m_lblk);
    3373           0 :                 return -EFSCORRUPTED;
    3374             :         }
    3375      465429 :         unwritten = ext4_ext_is_unwritten(ex);
    3376             : 
    3377      465429 :         if (map->m_lblk >= ee_block) {
    3378      465429 :                 split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
    3379      465429 :                 if (unwritten) {
    3380      412029 :                         split_flag1 |= EXT4_EXT_MARK_UNWRIT1;
    3381      412029 :                         split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
    3382             :                                                      EXT4_EXT_MARK_UNWRIT2);
    3383             :                 }
    3384      465429 :                 err = ext4_split_extent_at(handle, inode, ppath,
    3385             :                                 map->m_lblk, split_flag1, flags);
    3386      465430 :                 if (err)
    3387           0 :                         goto out;
    3388             :         }
    3389             : 
    3390      465430 :         ext4_ext_show_leaf(inode, path);
    3391      465430 : out:
    3392      465430 :         return err ? err : allocated;
    3393             : }
    3394             : 
    3395             : /*
    3396             :  * This function is called by ext4_ext_map_blocks() if someone tries to write
    3397             :  * to an unwritten extent. It may result in splitting the unwritten
    3398             :  * extent into multiple extents (up to three - one initialized and two
    3399             :  * unwritten).
    3400             :  * There are three possibilities:
    3401             :  *   a> There is no split required: Entire extent should be initialized
    3402             :  *   b> Splits in two extents: Write is happening at either end of the extent
    3403             :  *   c> Splits in three extents: Somone is writing in middle of the extent
    3404             :  *
    3405             :  * Pre-conditions:
    3406             :  *  - The extent pointed to by 'path' is unwritten.
    3407             :  *  - The extent pointed to by 'path' contains a superset
    3408             :  *    of the logical span [map->m_lblk, map->m_lblk + map->m_len).
    3409             :  *
    3410             :  * Post-conditions on success:
    3411             :  *  - the returned value is the number of blocks beyond map->l_lblk
    3412             :  *    that are allocated and initialized.
    3413             :  *    It is guaranteed to be >= map->m_len.
    3414             :  */
    3415        4976 : static int ext4_ext_convert_to_initialized(handle_t *handle,
    3416             :                                            struct inode *inode,
    3417             :                                            struct ext4_map_blocks *map,
    3418             :                                            struct ext4_ext_path **ppath,
    3419             :                                            int flags)
    3420             : {
    3421        4976 :         struct ext4_ext_path *path = *ppath;
    3422        4976 :         struct ext4_sb_info *sbi;
    3423        4976 :         struct ext4_extent_header *eh;
    3424        4976 :         struct ext4_map_blocks split_map;
    3425        4976 :         struct ext4_extent zero_ex1, zero_ex2;
    3426        4976 :         struct ext4_extent *ex, *abut_ex;
    3427        4976 :         ext4_lblk_t ee_block, eof_block;
    3428        4976 :         unsigned int ee_len, depth, map_len = map->m_len;
    3429        4976 :         int allocated = 0, max_zeroout = 0;
    3430        4976 :         int err = 0;
    3431        4976 :         int split_flag = EXT4_EXT_DATA_VALID2;
    3432             : 
    3433        4976 :         ext_debug(inode, "logical block %llu, max_blocks %u\n",
    3434             :                   (unsigned long long)map->m_lblk, map_len);
    3435             : 
    3436        4976 :         sbi = EXT4_SB(inode->i_sb);
    3437        9952 :         eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
    3438        4976 :                         >> inode->i_sb->s_blocksize_bits;
    3439        4976 :         if (eof_block < map->m_lblk + map_len)
    3440             :                 eof_block = map->m_lblk + map_len;
    3441             : 
    3442        4976 :         depth = ext_depth(inode);
    3443        4976 :         eh = path[depth].p_hdr;
    3444        4976 :         ex = path[depth].p_ext;
    3445        4976 :         ee_block = le32_to_cpu(ex->ee_block);
    3446        4976 :         ee_len = ext4_ext_get_actual_len(ex);
    3447        4976 :         zero_ex1.ee_len = 0;
    3448        4976 :         zero_ex2.ee_len = 0;
    3449             : 
    3450        4976 :         trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
    3451             : 
    3452             :         /* Pre-conditions */
    3453        4976 :         BUG_ON(!ext4_ext_is_unwritten(ex));
    3454        4976 :         BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
    3455             : 
    3456             :         /*
    3457             :          * Attempt to transfer newly initialized blocks from the currently
    3458             :          * unwritten extent to its neighbor. This is much cheaper
    3459             :          * than an insertion followed by a merge as those involve costly
    3460             :          * memmove() calls. Transferring to the left is the common case in
    3461             :          * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
    3462             :          * followed by append writes.
    3463             :          *
    3464             :          * Limitations of the current logic:
    3465             :          *  - L1: we do not deal with writes covering the whole extent.
    3466             :          *    This would require removing the extent if the transfer
    3467             :          *    is possible.
    3468             :          *  - L2: we only attempt to merge with an extent stored in the
    3469             :          *    same extent tree node.
    3470             :          */
    3471        4976 :         if ((map->m_lblk == ee_block) &&
    3472             :                 /* See if we can merge left */
    3473        1616 :                 (map_len < ee_len) &&                /*L1*/
    3474        1616 :                 (ex > EXT_FIRST_EXTENT(eh))) {       /*L2*/
    3475        1613 :                 ext4_lblk_t prev_lblk;
    3476        1613 :                 ext4_fsblk_t prev_pblk, ee_pblk;
    3477        1613 :                 unsigned int prev_len;
    3478             : 
    3479        1613 :                 abut_ex = ex - 1;
    3480        1613 :                 prev_lblk = le32_to_cpu(abut_ex->ee_block);
    3481        1613 :                 prev_len = ext4_ext_get_actual_len(abut_ex);
    3482        1613 :                 prev_pblk = ext4_ext_pblock(abut_ex);
    3483        1613 :                 ee_pblk = ext4_ext_pblock(ex);
    3484             : 
    3485             :                 /*
    3486             :                  * A transfer of blocks from 'ex' to 'abut_ex' is allowed
    3487             :                  * upon those conditions:
    3488             :                  * - C1: abut_ex is initialized,
    3489             :                  * - C2: abut_ex is logically abutting ex,
    3490             :                  * - C3: abut_ex is physically abutting ex,
    3491             :                  * - C4: abut_ex can receive the additional blocks without
    3492             :                  *   overflowing the (initialized) length limit.
    3493             :                  */
    3494        1613 :                 if ((!ext4_ext_is_unwritten(abut_ex)) &&                /*C1*/
    3495        1386 :                         ((prev_lblk + prev_len) == ee_block) &&         /*C2*/
    3496        1347 :                         ((prev_pblk + prev_len) == ee_pblk) &&          /*C3*/
    3497         347 :                         (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
    3498         347 :                         err = ext4_ext_get_access(handle, inode, path + depth);
    3499         347 :                         if (err)
    3500           0 :                                 goto out;
    3501             : 
    3502         347 :                         trace_ext4_ext_convert_to_initialized_fastpath(inode,
    3503             :                                 map, ex, abut_ex);
    3504             : 
    3505             :                         /* Shift the start of ex by 'map_len' blocks */
    3506         347 :                         ex->ee_block = cpu_to_le32(ee_block + map_len);
    3507         347 :                         ext4_ext_store_pblock(ex, ee_pblk + map_len);
    3508         347 :                         ex->ee_len = cpu_to_le16(ee_len - map_len);
    3509         347 :                         ext4_ext_mark_unwritten(ex); /* Restore the flag */
    3510             : 
    3511             :                         /* Extend abut_ex by 'map_len' blocks */
    3512         347 :                         abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
    3513             : 
    3514             :                         /* Result: number of initialized blocks past m_lblk */
    3515         347 :                         allocated = map_len;
    3516             :                 }
    3517        3363 :         } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
    3518         960 :                    (map_len < ee_len) &&     /*L1*/
    3519         960 :                    ex < EXT_LAST_EXTENT(eh)) {       /*L2*/
    3520             :                 /* See if we can merge right */
    3521         188 :                 ext4_lblk_t next_lblk;
    3522         188 :                 ext4_fsblk_t next_pblk, ee_pblk;
    3523         188 :                 unsigned int next_len;
    3524             : 
    3525         188 :                 abut_ex = ex + 1;
    3526         188 :                 next_lblk = le32_to_cpu(abut_ex->ee_block);
    3527         188 :                 next_len = ext4_ext_get_actual_len(abut_ex);
    3528         188 :                 next_pblk = ext4_ext_pblock(abut_ex);
    3529         188 :                 ee_pblk = ext4_ext_pblock(ex);
    3530             : 
    3531             :                 /*
    3532             :                  * A transfer of blocks from 'ex' to 'abut_ex' is allowed
    3533             :                  * upon those conditions:
    3534             :                  * - C1: abut_ex is initialized,
    3535             :                  * - C2: abut_ex is logically abutting ex,
    3536             :                  * - C3: abut_ex is physically abutting ex,
    3537             :                  * - C4: abut_ex can receive the additional blocks without
    3538             :                  *   overflowing the (initialized) length limit.
    3539             :                  */
    3540         188 :                 if ((!ext4_ext_is_unwritten(abut_ex)) &&                /*C1*/
    3541           0 :                     ((map->m_lblk + map_len) == next_lblk) &&                /*C2*/
    3542           0 :                     ((ee_pblk + ee_len) == next_pblk) &&                /*C3*/
    3543           0 :                     (next_len < (EXT_INIT_MAX_LEN - map_len))) {     /*C4*/
    3544           0 :                         err = ext4_ext_get_access(handle, inode, path + depth);
    3545           0 :                         if (err)
    3546           0 :                                 goto out;
    3547             : 
    3548           0 :                         trace_ext4_ext_convert_to_initialized_fastpath(inode,
    3549             :                                 map, ex, abut_ex);
    3550             : 
    3551             :                         /* Shift the start of abut_ex by 'map_len' blocks */
    3552           0 :                         abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
    3553           0 :                         ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
    3554           0 :                         ex->ee_len = cpu_to_le16(ee_len - map_len);
    3555           0 :                         ext4_ext_mark_unwritten(ex); /* Restore the flag */
    3556             : 
    3557             :                         /* Extend abut_ex by 'map_len' blocks */
    3558           0 :                         abut_ex->ee_len = cpu_to_le16(next_len + map_len);
    3559             : 
    3560             :                         /* Result: number of initialized blocks past m_lblk */
    3561           0 :                         allocated = map_len;
    3562             :                 }
    3563             :         }
    3564         347 :         if (allocated) {
    3565             :                 /* Mark the block containing both extents as dirty */
    3566         347 :                 err = ext4_ext_dirty(handle, inode, path + depth);
    3567             : 
    3568             :                 /* Update path to point to the right extent */
    3569         347 :                 path[depth].p_ext = abut_ex;
    3570         347 :                 goto out;
    3571             :         } else
    3572        4629 :                 allocated = ee_len - (map->m_lblk - ee_block);
    3573             : 
    3574        4629 :         WARN_ON(map->m_lblk < ee_block);
    3575             :         /*
    3576             :          * It is safe to convert extent to initialized via explicit
    3577             :          * zeroout only if extent is fully inside i_size or new_size.
    3578             :          */
    3579        4629 :         split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
    3580             : 
    3581        4629 :         if (EXT4_EXT_MAY_ZEROOUT & split_flag)
    3582        1936 :                 max_zeroout = sbi->s_extent_max_zeroout_kb >>
    3583        1936 :                         (inode->i_sb->s_blocksize_bits - 10);
    3584             : 
    3585             :         /*
    3586             :          * five cases:
    3587             :          * 1. split the extent into three extents.
    3588             :          * 2. split the extent into two extents, zeroout the head of the first
    3589             :          *    extent.
    3590             :          * 3. split the extent into two extents, zeroout the tail of the second
    3591             :          *    extent.
    3592             :          * 4. split the extent into two extents with out zeroout.
    3593             :          * 5. no splitting needed, just possibly zeroout the head and / or the
    3594             :          *    tail of the extent.
    3595             :          */
    3596        4629 :         split_map.m_lblk = map->m_lblk;
    3597        4629 :         split_map.m_len = map->m_len;
    3598             : 
    3599        4629 :         if (max_zeroout && (allocated > split_map.m_len)) {
    3600           0 :                 if (allocated <= max_zeroout) {
    3601             :                         /* case 3 or 5 */
    3602           0 :                         zero_ex1.ee_block =
    3603           0 :                                  cpu_to_le32(split_map.m_lblk +
    3604             :                                              split_map.m_len);
    3605           0 :                         zero_ex1.ee_len =
    3606           0 :                                 cpu_to_le16(allocated - split_map.m_len);
    3607           0 :                         ext4_ext_store_pblock(&zero_ex1,
    3608           0 :                                 ext4_ext_pblock(ex) + split_map.m_lblk +
    3609           0 :                                 split_map.m_len - ee_block);
    3610           0 :                         err = ext4_ext_zeroout(inode, &zero_ex1);
    3611           0 :                         if (err)
    3612           0 :                                 goto fallback;
    3613           0 :                         split_map.m_len = allocated;
    3614             :                 }
    3615           0 :                 if (split_map.m_lblk - ee_block + split_map.m_len <
    3616             :                                                                 max_zeroout) {
    3617             :                         /* case 2 or 5 */
    3618           0 :                         if (split_map.m_lblk != ee_block) {
    3619           0 :                                 zero_ex2.ee_block = ex->ee_block;
    3620           0 :                                 zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
    3621             :                                                         ee_block);
    3622           0 :                                 ext4_ext_store_pblock(&zero_ex2,
    3623             :                                                       ext4_ext_pblock(ex));
    3624           0 :                                 err = ext4_ext_zeroout(inode, &zero_ex2);
    3625           0 :                                 if (err)
    3626           0 :                                         goto fallback;
    3627             :                         }
    3628             : 
    3629           0 :                         split_map.m_len += split_map.m_lblk - ee_block;
    3630           0 :                         split_map.m_lblk = ee_block;
    3631           0 :                         allocated = map->m_len;
    3632             :                 }
    3633             :         }
    3634             : 
    3635        4629 : fallback:
    3636        4629 :         err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
    3637             :                                 flags);
    3638        4629 :         if (err > 0)
    3639             :                 err = 0;
    3640           0 : out:
    3641             :         /* If we have gotten a failure, don't zero out status tree */
    3642         347 :         if (!err) {
    3643        4976 :                 ext4_zeroout_es(inode, &zero_ex1);
    3644        4976 :                 ext4_zeroout_es(inode, &zero_ex2);
    3645             :         }
    3646        4976 :         return err ? err : allocated;
    3647             : }
    3648             : 
    3649             : /*
    3650             :  * This function is called by ext4_ext_map_blocks() from
    3651             :  * ext4_get_blocks_dio_write() when DIO to write
    3652             :  * to an unwritten extent.
    3653             :  *
    3654             :  * Writing to an unwritten extent may result in splitting the unwritten
    3655             :  * extent into multiple initialized/unwritten extents (up to three)
    3656             :  * There are three possibilities:
    3657             :  *   a> There is no split required: Entire extent should be unwritten
    3658             :  *   b> Splits in two extents: Write is happening at either end of the extent
    3659             :  *   c> Splits in three extents: Somone is writing in middle of the extent
    3660             :  *
    3661             :  * This works the same way in the case of initialized -> unwritten conversion.
    3662             :  *
    3663             :  * One of more index blocks maybe needed if the extent tree grow after
    3664             :  * the unwritten extent split. To prevent ENOSPC occur at the IO
    3665             :  * complete, we need to split the unwritten extent before DIO submit
    3666             :  * the IO. The unwritten extent called at this time will be split
    3667             :  * into three unwritten extent(at most). After IO complete, the part
    3668             :  * being filled will be convert to initialized by the end_io callback function
    3669             :  * via ext4_convert_unwritten_extents().
    3670             :  *
    3671             :  * Returns the size of unwritten extent to be written on success.
    3672             :  */
    3673      460798 : static int ext4_split_convert_extents(handle_t *handle,
    3674             :                                         struct inode *inode,
    3675             :                                         struct ext4_map_blocks *map,
    3676             :                                         struct ext4_ext_path **ppath,
    3677             :                                         int flags)
    3678             : {
    3679      460798 :         struct ext4_ext_path *path = *ppath;
    3680      460798 :         ext4_lblk_t eof_block;
    3681      460798 :         ext4_lblk_t ee_block;
    3682      460798 :         struct ext4_extent *ex;
    3683      460798 :         unsigned int ee_len;
    3684      460798 :         int split_flag = 0, depth;
    3685             : 
    3686      460798 :         ext_debug(inode, "logical block %llu, max_blocks %u\n",
    3687             :                   (unsigned long long)map->m_lblk, map->m_len);
    3688             : 
    3689      921596 :         eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
    3690      460798 :                         >> inode->i_sb->s_blocksize_bits;
    3691      460798 :         if (eof_block < map->m_lblk + map->m_len)
    3692             :                 eof_block = map->m_lblk + map->m_len;
    3693             :         /*
    3694             :          * It is safe to convert extent to initialized via explicit
    3695             :          * zeroout only if extent is fully inside i_size or new_size.
    3696             :          */
    3697      460798 :         depth = ext_depth(inode);
    3698      460798 :         ex = path[depth].p_ext;
    3699      460798 :         ee_block = le32_to_cpu(ex->ee_block);
    3700      460798 :         ee_len = ext4_ext_get_actual_len(ex);
    3701             : 
    3702             :         /* Convert to unwritten */
    3703      460798 :         if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
    3704             :                 split_flag |= EXT4_EXT_DATA_VALID1;
    3705             :         /* Convert to initialized */
    3706      407399 :         } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
    3707      407399 :                 split_flag |= ee_block + ee_len <= eof_block ?
    3708      407399 :                               EXT4_EXT_MAY_ZEROOUT : 0;
    3709      407399 :                 split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
    3710             :         }
    3711      460798 :         flags |= EXT4_GET_BLOCKS_PRE_IO;
    3712      460798 :         return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
    3713             : }
    3714             : 
    3715     1643796 : static int ext4_convert_unwritten_extents_endio(handle_t *handle,
    3716             :                                                 struct inode *inode,
    3717             :                                                 struct ext4_map_blocks *map,
    3718             :                                                 struct ext4_ext_path **ppath)
    3719             : {
    3720     1643796 :         struct ext4_ext_path *path = *ppath;
    3721     1643796 :         struct ext4_extent *ex;
    3722     1643796 :         ext4_lblk_t ee_block;
    3723     1643796 :         unsigned int ee_len;
    3724     1643796 :         int depth;
    3725     1643796 :         int err = 0;
    3726             : 
    3727     1643796 :         depth = ext_depth(inode);
    3728     1643796 :         ex = path[depth].p_ext;
    3729     1643796 :         ee_block = le32_to_cpu(ex->ee_block);
    3730     1643796 :         ee_len = ext4_ext_get_actual_len(ex);
    3731             : 
    3732     1643796 :         ext_debug(inode, "logical block %llu, max_blocks %u\n",
    3733             :                   (unsigned long long)ee_block, ee_len);
    3734             : 
    3735             :         /* If extent is larger than requested it is a clear sign that we still
    3736             :          * have some extent state machine issues left. So extent_split is still
    3737             :          * required.
    3738             :          * TODO: Once all related issues will be fixed this situation should be
    3739             :          * illegal.
    3740             :          */
    3741     1643796 :         if (ee_block != map->m_lblk || ee_len > map->m_len) {
    3742             : #ifdef CONFIG_EXT4_DEBUG
    3743          20 :                 ext4_warning(inode->i_sb, "Inode (%ld) finished: extent logical block %llu,"
    3744             :                              " len %u; IO logical block %llu, len %u",
    3745             :                              inode->i_ino, (unsigned long long)ee_block, ee_len,
    3746             :                              (unsigned long long)map->m_lblk, map->m_len);
    3747             : #endif
    3748          22 :                 err = ext4_split_convert_extents(handle, inode, map, ppath,
    3749             :                                                  EXT4_GET_BLOCKS_CONVERT);
    3750          22 :                 if (err < 0)
    3751             :                         return err;
    3752          22 :                 path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
    3753          22 :                 if (IS_ERR(path))
    3754           0 :                         return PTR_ERR(path);
    3755          22 :                 depth = ext_depth(inode);
    3756          22 :                 ex = path[depth].p_ext;
    3757             :         }
    3758             : 
    3759     1643798 :         err = ext4_ext_get_access(handle, inode, path + depth);
    3760     1643815 :         if (err)
    3761           0 :                 goto out;
    3762             :         /* first mark the extent as initialized */
    3763     1643815 :         ext4_ext_mark_initialized(ex);
    3764             : 
    3765             :         /* note: ext4_ext_correct_indexes() isn't needed here because
    3766             :          * borders are not changed
    3767             :          */
    3768     1643815 :         ext4_ext_try_to_merge(handle, inode, path, ex);
    3769             : 
    3770             :         /* Mark modified extent as dirty */
    3771     1643802 :         err = ext4_ext_dirty(handle, inode, path + path->p_depth);
    3772             : out:
    3773             :         ext4_ext_show_leaf(inode, path);
    3774             :         return err;
    3775             : }
    3776             : 
    3777             : static int
    3778      122030 : convert_initialized_extent(handle_t *handle, struct inode *inode,
    3779             :                            struct ext4_map_blocks *map,
    3780             :                            struct ext4_ext_path **ppath,
    3781             :                            unsigned int *allocated)
    3782             : {
    3783      122030 :         struct ext4_ext_path *path = *ppath;
    3784      122030 :         struct ext4_extent *ex;
    3785      122030 :         ext4_lblk_t ee_block;
    3786      122030 :         unsigned int ee_len;
    3787      122030 :         int depth;
    3788      122030 :         int err = 0;
    3789             : 
    3790             :         /*
    3791             :          * Make sure that the extent is no bigger than we support with
    3792             :          * unwritten extent
    3793             :          */
    3794      122030 :         if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
    3795           0 :                 map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
    3796             : 
    3797      122030 :         depth = ext_depth(inode);
    3798      122030 :         ex = path[depth].p_ext;
    3799      122030 :         ee_block = le32_to_cpu(ex->ee_block);
    3800      122030 :         ee_len = ext4_ext_get_actual_len(ex);
    3801             : 
    3802      122030 :         ext_debug(inode, "logical block %llu, max_blocks %u\n",
    3803             :                   (unsigned long long)ee_block, ee_len);
    3804             : 
    3805      122030 :         if (ee_block != map->m_lblk || ee_len > map->m_len) {
    3806       53399 :                 err = ext4_split_convert_extents(handle, inode, map, ppath,
    3807             :                                 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
    3808       53399 :                 if (err < 0)
    3809             :                         return err;
    3810       53399 :                 path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
    3811       53399 :                 if (IS_ERR(path))
    3812           0 :                         return PTR_ERR(path);
    3813       53399 :                 depth = ext_depth(inode);
    3814       53399 :                 ex = path[depth].p_ext;
    3815       53399 :                 if (!ex) {
    3816           0 :                         EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
    3817             :                                          (unsigned long) map->m_lblk);
    3818           0 :                         return -EFSCORRUPTED;
    3819             :                 }
    3820             :         }
    3821             : 
    3822      122030 :         err = ext4_ext_get_access(handle, inode, path + depth);
    3823      122030 :         if (err)
    3824             :                 return err;
    3825             :         /* first mark the extent as unwritten */
    3826      122030 :         ext4_ext_mark_unwritten(ex);
    3827             : 
    3828             :         /* note: ext4_ext_correct_indexes() isn't needed here because
    3829             :          * borders are not changed
    3830             :          */
    3831      122030 :         ext4_ext_try_to_merge(handle, inode, path, ex);
    3832             : 
    3833             :         /* Mark modified extent as dirty */
    3834      122030 :         err = ext4_ext_dirty(handle, inode, path + path->p_depth);
    3835      122030 :         if (err)
    3836             :                 return err;
    3837      122030 :         ext4_ext_show_leaf(inode, path);
    3838             : 
    3839      122030 :         ext4_update_inode_fsync_trans(handle, inode, 1);
    3840             : 
    3841      122030 :         map->m_flags |= EXT4_MAP_UNWRITTEN;
    3842      122030 :         if (*allocated > map->m_len)
    3843       28653 :                 *allocated = map->m_len;
    3844      122030 :         map->m_len = *allocated;
    3845      122030 :         return 0;
    3846             : }
    3847             : 
    3848             : static int
    3849     3003633 : ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
    3850             :                         struct ext4_map_blocks *map,
    3851             :                         struct ext4_ext_path **ppath, int flags,
    3852             :                         unsigned int allocated, ext4_fsblk_t newblock)
    3853             : {
    3854     3003633 :         struct ext4_ext_path __maybe_unused *path = *ppath;
    3855     3003633 :         int ret = 0;
    3856     3003633 :         int err = 0;
    3857             : 
    3858     3003633 :         ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n",
    3859             :                   (unsigned long long)map->m_lblk, map->m_len, flags,
    3860             :                   allocated);
    3861     3003633 :         ext4_ext_show_leaf(inode, path);
    3862             : 
    3863             :         /*
    3864             :          * When writing into unwritten space, we should not fail to
    3865             :          * allocate metadata blocks for the new extent block if needed.
    3866             :          */
    3867     3003633 :         flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
    3868             : 
    3869     3003633 :         trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
    3870             :                                                     allocated, newblock);
    3871             : 
    3872             :         /* get_block() before submitting IO, split the extent */
    3873     3003647 :         if (flags & EXT4_GET_BLOCKS_PRE_IO) {
    3874      407376 :                 ret = ext4_split_convert_extents(handle, inode, map, ppath,
    3875             :                                          flags | EXT4_GET_BLOCKS_CONVERT);
    3876      407377 :                 if (ret < 0) {
    3877           0 :                         err = ret;
    3878           0 :                         goto out2;
    3879             :                 }
    3880             :                 /*
    3881             :                  * shouldn't get a 0 return when splitting an extent unless
    3882             :                  * m_len is 0 (bug) or extent has been corrupted
    3883             :                  */
    3884      407377 :                 if (unlikely(ret == 0)) {
    3885           0 :                         EXT4_ERROR_INODE(inode,
    3886             :                                          "unexpected ret == 0, m_len = %u",
    3887             :                                          map->m_len);
    3888           0 :                         err = -EFSCORRUPTED;
    3889           0 :                         goto out2;
    3890             :                 }
    3891      407377 :                 map->m_flags |= EXT4_MAP_UNWRITTEN;
    3892      407377 :                 goto out;
    3893             :         }
    3894             :         /* IO end_io complete, convert the filled extent to written */
    3895     2596271 :         if (flags & EXT4_GET_BLOCKS_CONVERT) {
    3896     1643799 :                 err = ext4_convert_unwritten_extents_endio(handle, inode, map,
    3897             :                                                            ppath);
    3898     1643810 :                 if (err < 0)
    3899           0 :                         goto out2;
    3900     1643810 :                 ext4_update_inode_fsync_trans(handle, inode, 1);
    3901     1643801 :                 goto map_out;
    3902             :         }
    3903             :         /* buffered IO cases */
    3904             :         /*
    3905             :          * repeat fallocate creation request
    3906             :          * we already have an unwritten extent
    3907             :          */
    3908      952472 :         if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
    3909      564678 :                 map->m_flags |= EXT4_MAP_UNWRITTEN;
    3910      564678 :                 goto map_out;
    3911             :         }
    3912             : 
    3913             :         /* buffered READ or buffered write_begin() lookup */
    3914      387794 :         if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
    3915             :                 /*
    3916             :                  * We have blocks reserved already.  We
    3917             :                  * return allocated blocks so that delalloc
    3918             :                  * won't do block reservation for us.  But
    3919             :                  * the buffer head will be unmapped so that
    3920             :                  * a read from the block returns 0s.
    3921             :                  */
    3922      382818 :                 map->m_flags |= EXT4_MAP_UNWRITTEN;
    3923      382818 :                 goto out1;
    3924             :         }
    3925             : 
    3926             :         /*
    3927             :          * Default case when (flags & EXT4_GET_BLOCKS_CREATE) == 1.
    3928             :          * For buffered writes, at writepage time, etc.  Convert a
    3929             :          * discovered unwritten extent to written.
    3930             :          */
    3931        4976 :         ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
    3932        4976 :         if (ret < 0) {
    3933           0 :                 err = ret;
    3934           0 :                 goto out2;
    3935             :         }
    3936        4976 :         ext4_update_inode_fsync_trans(handle, inode, 1);
    3937             :         /*
    3938             :          * shouldn't get a 0 return when converting an unwritten extent
    3939             :          * unless m_len is 0 (bug) or extent has been corrupted
    3940             :          */
    3941        4976 :         if (unlikely(ret == 0)) {
    3942           0 :                 EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u",
    3943             :                                  map->m_len);
    3944           0 :                 err = -EFSCORRUPTED;
    3945           0 :                 goto out2;
    3946             :         }
    3947             : 
    3948        4976 : out:
    3949      412353 :         allocated = ret;
    3950      412353 :         map->m_flags |= EXT4_MAP_NEW;
    3951     2620832 : map_out:
    3952     2620832 :         map->m_flags |= EXT4_MAP_MAPPED;
    3953     3003650 : out1:
    3954     3003650 :         map->m_pblk = newblock;
    3955     3003650 :         if (allocated > map->m_len)
    3956             :                 allocated = map->m_len;
    3957     3003650 :         map->m_len = allocated;
    3958     3003650 :         ext4_ext_show_leaf(inode, path);
    3959     3003650 : out2:
    3960     3003650 :         return err ? err : allocated;
    3961             : }
    3962             : 
    3963             : /*
    3964             :  * get_implied_cluster_alloc - check to see if the requested
    3965             :  * allocation (in the map structure) overlaps with a cluster already
    3966             :  * allocated in an extent.
    3967             :  *      @sb     The filesystem superblock structure
    3968             :  *      @map    The requested lblk->pblk mapping
    3969             :  *      @ex     The extent structure which might contain an implied
    3970             :  *                      cluster allocation
    3971             :  *
    3972             :  * This function is called by ext4_ext_map_blocks() after we failed to
    3973             :  * find blocks that were already in the inode's extent tree.  Hence,
    3974             :  * we know that the beginning of the requested region cannot overlap
    3975             :  * the extent from the inode's extent tree.  There are three cases we
    3976             :  * want to catch.  The first is this case:
    3977             :  *
    3978             :  *               |--- cluster # N--|
    3979             :  *    |--- extent ---|  |---- requested region ---|
    3980             :  *                      |==========|
    3981             :  *
    3982             :  * The second case that we need to test for is this one:
    3983             :  *
    3984             :  *   |--------- cluster # N ----------------|
    3985             :  *         |--- requested region --|   |------- extent ----|
    3986             :  *         |=======================|
    3987             :  *
    3988             :  * The third case is when the requested region lies between two extents
    3989             :  * within the same cluster:
    3990             :  *          |------------- cluster # N-------------|
    3991             :  * |----- ex -----|                  |---- ex_right ----|
    3992             :  *                  |------ requested region ------|
    3993             :  *                  |================|
    3994             :  *
    3995             :  * In each of the above cases, we need to set the map->m_pblk and
    3996             :  * map->m_len so it corresponds to the return the extent labelled as
    3997             :  * "|====|" from cluster #N, since it is already in use for data in
    3998             :  * cluster EXT4_B2C(sbi, map->m_lblk).       We will then return 1 to
    3999             :  * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
    4000             :  * as a new "allocated" block region.  Otherwise, we will return 0 and
    4001             :  * ext4_ext_map_blocks() will then allocate one or more new clusters
    4002             :  * by calling ext4_mb_new_blocks().
    4003             :  */
    4004           1 : static int get_implied_cluster_alloc(struct super_block *sb,
    4005             :                                      struct ext4_map_blocks *map,
    4006             :                                      struct ext4_extent *ex,
    4007             :                                      struct ext4_ext_path *path)
    4008             : {
    4009           1 :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    4010           1 :         ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
    4011           1 :         ext4_lblk_t ex_cluster_start, ex_cluster_end;
    4012           1 :         ext4_lblk_t rr_cluster_start;
    4013           1 :         ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
    4014           1 :         ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
    4015           1 :         unsigned short ee_len = ext4_ext_get_actual_len(ex);
    4016             : 
    4017             :         /* The extent passed in that we are trying to match */
    4018           1 :         ex_cluster_start = EXT4_B2C(sbi, ee_block);
    4019           1 :         ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
    4020             : 
    4021             :         /* The requested region passed into ext4_map_blocks() */
    4022           1 :         rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
    4023             : 
    4024           1 :         if ((rr_cluster_start == ex_cluster_end) ||
    4025           1 :             (rr_cluster_start == ex_cluster_start)) {
    4026           1 :                 if (rr_cluster_start == ex_cluster_end)
    4027           1 :                         ee_start += ee_len - 1;
    4028           1 :                 map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
    4029           1 :                 map->m_len = min(map->m_len,
    4030             :                                  (unsigned) sbi->s_cluster_ratio - c_offset);
    4031             :                 /*
    4032             :                  * Check for and handle this case:
    4033             :                  *
    4034             :                  *   |--------- cluster # N-------------|
    4035             :                  *                     |------- extent ----|
    4036             :                  *         |--- requested region ---|
    4037             :                  *         |===========|
    4038             :                  */
    4039             : 
    4040           1 :                 if (map->m_lblk < ee_block)
    4041           0 :                         map->m_len = min(map->m_len, ee_block - map->m_lblk);
    4042             : 
    4043             :                 /*
    4044             :                  * Check for the case where there is already another allocated
    4045             :                  * block to the right of 'ex' but before the end of the cluster.
    4046             :                  *
    4047             :                  *          |------------- cluster # N-------------|
    4048             :                  * |----- ex -----|                  |---- ex_right ----|
    4049             :                  *                  |------ requested region ------|
    4050             :                  *                  |================|
    4051             :                  */
    4052           1 :                 if (map->m_lblk > ee_block) {
    4053           1 :                         ext4_lblk_t next = ext4_ext_next_allocated_block(path);
    4054           1 :                         map->m_len = min(map->m_len, next - map->m_lblk);
    4055             :                 }
    4056             : 
    4057           1 :                 trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
    4058           1 :                 return 1;
    4059             :         }
    4060             : 
    4061           0 :         trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
    4062           0 :         return 0;
    4063             : }
    4064             : 
    4065             : 
    4066             : /*
    4067             :  * Block allocation/map/preallocation routine for extents based files
    4068             :  *
    4069             :  *
    4070             :  * Need to be called with
    4071             :  * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
    4072             :  * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
    4073             :  *
    4074             :  * return > 0, number of blocks already mapped/allocated
    4075             :  *          if create == 0 and these are pre-allocated blocks
    4076             :  *              buffer head is unmapped
    4077             :  *          otherwise blocks are mapped
    4078             :  *
    4079             :  * return = 0, if plain look up failed (blocks have not been allocated)
    4080             :  *          buffer head is unmapped
    4081             :  *
    4082             :  * return < 0, error case.
    4083             :  */
    4084    11149522 : int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
    4085             :                         struct ext4_map_blocks *map, int flags)
    4086             : {
    4087    11149522 :         struct ext4_ext_path *path = NULL;
    4088    11149522 :         struct ext4_extent newex, *ex, ex2;
    4089    11149522 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    4090    11149522 :         ext4_fsblk_t newblock = 0, pblk;
    4091    11149522 :         int err = 0, depth, ret;
    4092    11149522 :         unsigned int allocated = 0, offset = 0;
    4093    11149522 :         unsigned int allocated_clusters = 0;
    4094    11149522 :         struct ext4_allocation_request ar;
    4095    11149522 :         ext4_lblk_t cluster_offset;
    4096             : 
    4097    11149522 :         ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len);
    4098    11149522 :         trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
    4099             : 
    4100             :         /* find extent for this block */
    4101    11139635 :         path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
    4102    11140373 :         if (IS_ERR(path)) {
    4103           1 :                 err = PTR_ERR(path);
    4104           1 :                 path = NULL;
    4105           1 :                 goto out;
    4106             :         }
    4107             : 
    4108    11140372 :         depth = ext_depth(inode);
    4109             : 
    4110             :         /*
    4111             :          * consistent leaf must not be empty;
    4112             :          * this situation is possible, though, _during_ tree modification;
    4113             :          * this is why assert can't be put in ext4_find_extent()
    4114             :          */
    4115    11140372 :         if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
    4116           0 :                 EXT4_ERROR_INODE(inode, "bad extent address "
    4117             :                                  "lblock: %lu, depth: %d pblock %lld",
    4118             :                                  (unsigned long) map->m_lblk, depth,
    4119             :                                  path[depth].p_block);
    4120           0 :                 err = -EFSCORRUPTED;
    4121           0 :                 goto out;
    4122             :         }
    4123             : 
    4124    11140372 :         ex = path[depth].p_ext;
    4125    11140372 :         if (ex) {
    4126     8570753 :                 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
    4127     8570753 :                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
    4128     8570753 :                 unsigned short ee_len;
    4129             : 
    4130             : 
    4131             :                 /*
    4132             :                  * unwritten extents are treated as holes, except that
    4133             :                  * we split out initialized portions during a write.
    4134             :                  */
    4135     8570753 :                 ee_len = ext4_ext_get_actual_len(ex);
    4136             : 
    4137     8570753 :                 trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
    4138             : 
    4139             :                 /* if found extent covers block, simply return it */
    4140     8570923 :                 if (in_range(map->m_lblk, ee_block, ee_len)) {
    4141     3958958 :                         newblock = map->m_lblk - ee_block + ee_start;
    4142             :                         /* number of remaining blocks in the extent */
    4143     3958958 :                         allocated = ee_len - (map->m_lblk - ee_block);
    4144     3958958 :                         ext_debug(inode, "%u fit into %u:%d -> %llu\n",
    4145             :                                   map->m_lblk, ee_block, ee_len, newblock);
    4146             : 
    4147             :                         /*
    4148             :                          * If the extent is initialized check whether the
    4149             :                          * caller wants to convert it to unwritten.
    4150             :                          */
    4151     3958958 :                         if ((!ext4_ext_is_unwritten(ex)) &&
    4152      955320 :                             (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
    4153      122030 :                                 err = convert_initialized_extent(handle,
    4154             :                                         inode, map, &path, &allocated);
    4155      122030 :                                 goto out;
    4156     3836928 :                         } else if (!ext4_ext_is_unwritten(ex)) {
    4157      833290 :                                 map->m_flags |= EXT4_MAP_MAPPED;
    4158      833290 :                                 map->m_pblk = newblock;
    4159      833290 :                                 if (allocated > map->m_len)
    4160      192882 :                                         allocated = map->m_len;
    4161      833290 :                                 map->m_len = allocated;
    4162      833290 :                                 ext4_ext_show_leaf(inode, path);
    4163      833290 :                                 goto out;
    4164             :                         }
    4165             : 
    4166     3003638 :                         ret = ext4_ext_handle_unwritten_extents(
    4167             :                                 handle, inode, map, &path, flags,
    4168             :                                 allocated, newblock);
    4169     3003626 :                         if (ret < 0)
    4170           0 :                                 err = ret;
    4171             :                         else
    4172     3003626 :                                 allocated = ret;
    4173     3003626 :                         goto out;
    4174             :                 }
    4175             :         }
    4176             : 
    4177             :         /*
    4178             :          * requested block isn't allocated yet;
    4179             :          * we couldn't try to create block if create flag is zero
    4180             :          */
    4181     7181584 :         if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
    4182     3366430 :                 ext4_lblk_t hole_start, hole_len;
    4183             : 
    4184     3366430 :                 hole_start = map->m_lblk;
    4185     3366430 :                 hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
    4186             :                 /*
    4187             :                  * put just found gap into cache to speed up
    4188             :                  * subsequent requests
    4189             :                  */
    4190     3364038 :                 ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
    4191             : 
    4192             :                 /* Update hole_len to reflect hole size after map->m_lblk */
    4193     3372708 :                 if (hole_start != map->m_lblk)
    4194     1386929 :                         hole_len -= map->m_lblk - hole_start;
    4195     3372708 :                 map->m_pblk = 0;
    4196     3372708 :                 map->m_len = min_t(unsigned int, map->m_len, hole_len);
    4197             : 
    4198     3372708 :                 goto out;
    4199             :         }
    4200             : 
    4201             :         /*
    4202             :          * Okay, we need to do block allocation.
    4203             :          */
    4204     3815154 :         newex.ee_block = cpu_to_le32(map->m_lblk);
    4205     3815154 :         cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
    4206             : 
    4207             :         /*
    4208             :          * If we are doing bigalloc, check to see if the extent returned
    4209             :          * by ext4_find_extent() implies a cluster we can use.
    4210             :          */
    4211     3815155 :         if (cluster_offset && ex &&
    4212           1 :             get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
    4213           1 :                 ar.len = allocated = map->m_len;
    4214           1 :                 newblock = map->m_pblk;
    4215           1 :                 goto got_allocated_blocks;
    4216             :         }
    4217             : 
    4218             :         /* find neighbour allocated blocks */
    4219     3815153 :         ar.lleft = map->m_lblk;
    4220     3815153 :         err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
    4221     3814296 :         if (err)
    4222           0 :                 goto out;
    4223     3814296 :         ar.lright = map->m_lblk;
    4224     3814296 :         err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
    4225     3814638 :         if (err < 0)
    4226           0 :                 goto out;
    4227             : 
    4228             :         /* Check if the extent after searching to the right implies a
    4229             :          * cluster we can use. */
    4230     3814638 :         if ((sbi->s_cluster_ratio > 1) && err &&
    4231           0 :             get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) {
    4232           0 :                 ar.len = allocated = map->m_len;
    4233           0 :                 newblock = map->m_pblk;
    4234           0 :                 goto got_allocated_blocks;
    4235             :         }
    4236             : 
    4237             :         /*
    4238             :          * See if request is beyond maximum number of blocks we can have in
    4239             :          * a single extent. For an initialized extent this limit is
    4240             :          * EXT_INIT_MAX_LEN and for an unwritten extent this limit is
    4241             :          * EXT_UNWRITTEN_MAX_LEN.
    4242             :          */
    4243     3814638 :         if (map->m_len > EXT_INIT_MAX_LEN &&
    4244      575046 :             !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
    4245           0 :                 map->m_len = EXT_INIT_MAX_LEN;
    4246     3814638 :         else if (map->m_len > EXT_UNWRITTEN_MAX_LEN &&
    4247      575084 :                  (flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
    4248      575084 :                 map->m_len = EXT_UNWRITTEN_MAX_LEN;
    4249             : 
    4250             :         /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
    4251     3814638 :         newex.ee_len = cpu_to_le16(map->m_len);
    4252     3814638 :         err = ext4_ext_check_overlap(sbi, inode, &newex, path);
    4253     3813280 :         if (err)
    4254           0 :                 allocated = ext4_ext_get_actual_len(&newex);
    4255             :         else
    4256     3813280 :                 allocated = map->m_len;
    4257             : 
    4258             :         /* allocate new block */
    4259     3813280 :         ar.inode = inode;
    4260     3813280 :         ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
    4261     3813346 :         ar.logical = map->m_lblk;
    4262             :         /*
    4263             :          * We calculate the offset from the beginning of the cluster
    4264             :          * for the logical block number, since when we allocate a
    4265             :          * physical cluster, the physical block should start at the
    4266             :          * same offset from the beginning of the cluster.  This is
    4267             :          * needed so that future calls to get_implied_cluster_alloc()
    4268             :          * work correctly.
    4269             :          */
    4270     3813346 :         offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
    4271     3813346 :         ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
    4272     3813346 :         ar.goal -= offset;
    4273     3813346 :         ar.logical -= offset;
    4274     3813346 :         if (S_ISREG(inode->i_mode))
    4275     3388580 :                 ar.flags = EXT4_MB_HINT_DATA;
    4276             :         else
    4277             :                 /* disable in-core preallocation for non-regular files */
    4278      424766 :                 ar.flags = 0;
    4279     3813346 :         if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
    4280      532280 :                 ar.flags |= EXT4_MB_HINT_NOPREALLOC;
    4281     3813346 :         if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
    4282      890193 :                 ar.flags |= EXT4_MB_DELALLOC_RESERVED;
    4283     3813346 :         if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
    4284      892138 :                 ar.flags |= EXT4_MB_USE_RESERVED;
    4285     3813346 :         newblock = ext4_mb_new_blocks(handle, &ar, &err);
    4286     3816159 :         if (!newblock)
    4287      723561 :                 goto out;
    4288     3092598 :         allocated_clusters = ar.len;
    4289     3092598 :         ar.len = EXT4_C2B(sbi, ar.len) - offset;
    4290     3092598 :         ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n",
    4291             :                   ar.goal, newblock, ar.len, allocated);
    4292     3092598 :         if (ar.len > allocated)
    4293           1 :                 ar.len = allocated;
    4294             : 
    4295     3092597 : got_allocated_blocks:
    4296             :         /* try to insert new extent into found leaf and return */
    4297     3092599 :         pblk = newblock + offset;
    4298     3092599 :         ext4_ext_store_pblock(&newex, pblk);
    4299     3092599 :         newex.ee_len = cpu_to_le16(ar.len);
    4300             :         /* Mark unwritten */
    4301     3092599 :         if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
    4302     2444744 :                 ext4_ext_mark_unwritten(&newex);
    4303     2444744 :                 map->m_flags |= EXT4_MAP_UNWRITTEN;
    4304             :         }
    4305             : 
    4306     3092599 :         err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags);
    4307     3092573 :         if (err) {
    4308         295 :                 if (allocated_clusters) {
    4309         295 :                         int fb_flags = 0;
    4310             : 
    4311             :                         /*
    4312             :                          * free data blocks we just allocated.
    4313             :                          * not a good idea to call discard here directly,
    4314             :                          * but otherwise we'd need to call it every free().
    4315             :                          */
    4316         295 :                         ext4_discard_preallocations(inode, 0);
    4317         295 :                         if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
    4318           4 :                                 fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
    4319         590 :                         ext4_free_blocks(handle, inode, NULL, newblock,
    4320         295 :                                          EXT4_C2B(sbi, allocated_clusters),
    4321             :                                          fb_flags);
    4322             :                 }
    4323         295 :                 goto out;
    4324             :         }
    4325             : 
    4326             :         /*
    4327             :          * Reduce the reserved cluster count to reflect successful deferred
    4328             :          * allocation of delayed allocated clusters or direct allocation of
    4329             :          * clusters discovered to be delayed allocated.  Once allocated, a
    4330             :          * cluster is not included in the reserved count.
    4331             :          */
    4332     3092278 :         if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) {
    4333     3087554 :                 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
    4334             :                         /*
    4335             :                          * When allocating delayed allocated clusters, simply
    4336             :                          * reduce the reserved cluster count and claim quota
    4337             :                          */
    4338      890086 :                         ext4_da_update_reserve_space(inode, allocated_clusters,
    4339             :                                                         1);
    4340             :                 } else {
    4341     2197468 :                         ext4_lblk_t lblk, len;
    4342     2197468 :                         unsigned int n;
    4343             : 
    4344             :                         /*
    4345             :                          * When allocating non-delayed allocated clusters
    4346             :                          * (from fallocate, filemap, DIO, or clusters
    4347             :                          * allocated when delalloc has been disabled by
    4348             :                          * ext4_nonda_switch), reduce the reserved cluster
    4349             :                          * count by the number of allocated clusters that
    4350             :                          * have previously been delayed allocated.  Quota
    4351             :                          * has been claimed by ext4_mb_new_blocks() above,
    4352             :                          * so release the quota reservations made for any
    4353             :                          * previously delayed allocated clusters.
    4354             :                          */
    4355     2197468 :                         lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk);
    4356     2197468 :                         len = allocated_clusters << sbi->s_cluster_bits;
    4357     2197468 :                         n = ext4_es_delayed_clu(inode, lblk, len);
    4358     2197716 :                         if (n > 0)
    4359       37948 :                                 ext4_da_update_reserve_space(inode, (int) n, 0);
    4360             :                 }
    4361             :         }
    4362             : 
    4363             :         /*
    4364             :          * Cache the extent and update transaction to commit on fdatasync only
    4365             :          * when it is _not_ an unwritten extent.
    4366             :          */
    4367     3092551 :         if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0)
    4368      647561 :                 ext4_update_inode_fsync_trans(handle, inode, 1);
    4369             :         else
    4370     2444990 :                 ext4_update_inode_fsync_trans(handle, inode, 0);
    4371             : 
    4372     3092221 :         map->m_flags |= (EXT4_MAP_NEW | EXT4_MAP_MAPPED);
    4373     3092221 :         map->m_pblk = pblk;
    4374     3092221 :         map->m_len = ar.len;
    4375     3092221 :         allocated = map->m_len;
    4376    11147732 :         ext4_ext_show_leaf(inode, path);
    4377    11147732 : out:
    4378    11147732 :         ext4_free_ext_path(path);
    4379             : 
    4380    11147978 :         trace_ext4_ext_map_blocks_exit(inode, flags, map,
    4381    11147978 :                                        err ? err : allocated);
    4382    11141106 :         return err ? err : allocated;
    4383             : }
    4384             : 
    4385      594972 : int ext4_ext_truncate(handle_t *handle, struct inode *inode)
    4386             : {
    4387      594972 :         struct super_block *sb = inode->i_sb;
    4388      594972 :         ext4_lblk_t last_block;
    4389      594972 :         int err = 0;
    4390             : 
    4391             :         /*
    4392             :          * TODO: optimization is possible here.
    4393             :          * Probably we need not scan at all,
    4394             :          * because page truncation is enough.
    4395             :          */
    4396             : 
    4397             :         /* we have to know where to truncate from in crash case */
    4398      594972 :         EXT4_I(inode)->i_disksize = inode->i_size;
    4399      594972 :         err = ext4_mark_inode_dirty(handle, inode);
    4400      595088 :         if (err)
    4401             :                 return err;
    4402             : 
    4403     1190176 :         last_block = (inode->i_size + sb->s_blocksize - 1)
    4404      595088 :                         >> EXT4_BLOCK_SIZE_BITS(sb);
    4405      595088 :         ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
    4406             : 
    4407      594891 : retry_remove_space:
    4408      594891 :         err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
    4409      594854 :         if (err == -ENOMEM) {
    4410           0 :                 memalloc_retry_wait(GFP_ATOMIC);
    4411           0 :                 goto retry_remove_space;
    4412             :         }
    4413             :         return err;
    4414             : }
    4415             : 
    4416      916771 : static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
    4417             :                                   ext4_lblk_t len, loff_t new_size,
    4418             :                                   int flags)
    4419             : {
    4420      916771 :         struct inode *inode = file_inode(file);
    4421      916771 :         handle_t *handle;
    4422      916771 :         int ret = 0, ret2 = 0, ret3 = 0;
    4423      916771 :         int retries = 0;
    4424      916771 :         int depth = 0;
    4425      916771 :         struct ext4_map_blocks map;
    4426      916771 :         unsigned int credits;
    4427      916771 :         loff_t epos;
    4428             : 
    4429      916771 :         BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
    4430      916771 :         map.m_lblk = offset;
    4431      916771 :         map.m_len = len;
    4432             :         /*
    4433             :          * Don't normalize the request if it can fit in one extent so
    4434             :          * that it doesn't get unnecessarily split into multiple
    4435             :          * extents.
    4436             :          */
    4437      916771 :         if (len <= EXT_UNWRITTEN_MAX_LEN)
    4438      916602 :                 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
    4439             : 
    4440             :         /*
    4441             :          * credits to insert 1 extent into extent tree
    4442             :          */
    4443      916771 :         credits = ext4_chunk_trans_blocks(inode, len);
    4444      916769 :         depth = ext_depth(inode);
    4445             : 
    4446             : retry:
    4447     3160382 :         while (len) {
    4448             :                 /*
    4449             :                  * Recalculate credits when extent tree depth changes.
    4450             :                  */
    4451     2256100 :                 if (depth != ext_depth(inode)) {
    4452        8383 :                         credits = ext4_chunk_trans_blocks(inode, len);
    4453        8382 :                         depth = ext_depth(inode);
    4454             :                 }
    4455             : 
    4456     2256099 :                 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
    4457             :                                             credits);
    4458     2256097 :                 if (IS_ERR(handle)) {
    4459           0 :                         ret = PTR_ERR(handle);
    4460           0 :                         break;
    4461             :                 }
    4462     2256097 :                 ret = ext4_map_blocks(handle, inode, &map, flags);
    4463     2256093 :                 if (ret <= 0) {
    4464       28835 :                         ext4_debug("inode #%lu: block %u: len %u: "
    4465             :                                    "ext4_ext_map_blocks returned %d",
    4466             :                                    inode->i_ino, map.m_lblk,
    4467             :                                    map.m_len, ret);
    4468       28835 :                         ext4_mark_inode_dirty(handle, inode);
    4469       28834 :                         ext4_journal_stop(handle);
    4470       28834 :                         break;
    4471             :                 }
    4472             :                 /*
    4473             :                  * allow a full retry cycle for any remaining allocations
    4474             :                  */
    4475     2227258 :                 retries = 0;
    4476     2227258 :                 map.m_lblk += ret;
    4477     2227258 :                 map.m_len = len = len - ret;
    4478     2227258 :                 epos = (loff_t)map.m_lblk << inode->i_blkbits;
    4479     2227258 :                 inode->i_ctime = current_time(inode);
    4480     2227251 :                 if (new_size) {
    4481     1090102 :                         if (epos > new_size)
    4482             :                                 epos = new_size;
    4483     1090102 :                         if (ext4_update_inode_size(inode, epos) & 0x1)
    4484      593104 :                                 inode->i_mtime = inode->i_ctime;
    4485             :                 }
    4486     2227249 :                 ret2 = ext4_mark_inode_dirty(handle, inode);
    4487     2227282 :                 ext4_update_inode_fsync_trans(handle, inode, 1);
    4488     2227282 :                 ret3 = ext4_journal_stop(handle);
    4489     2227277 :                 ret2 = ret3 ? ret3 : ret2;
    4490     2227277 :                 if (unlikely(ret2))
    4491             :                         break;
    4492             :         }
    4493      933115 :         if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
    4494       16336 :                 goto retry;
    4495             : 
    4496      916774 :         return ret > 0 ? ret2 : ret;
    4497             : }
    4498             : 
    4499             : static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
    4500             : 
    4501             : static int ext4_insert_range(struct file *file, loff_t offset, loff_t len);
    4502             : 
    4503      203911 : static long ext4_zero_range(struct file *file, loff_t offset,
    4504             :                             loff_t len, int mode)
    4505             : {
    4506      203911 :         struct inode *inode = file_inode(file);
    4507      203911 :         struct address_space *mapping = file->f_mapping;
    4508      203911 :         handle_t *handle = NULL;
    4509      203911 :         unsigned int max_blocks;
    4510      203911 :         loff_t new_size = 0;
    4511      203911 :         int ret = 0;
    4512      203911 :         int flags;
    4513      203911 :         int credits;
    4514      203911 :         int partial_begin, partial_end;
    4515      203911 :         loff_t start, end;
    4516      203911 :         ext4_lblk_t lblk;
    4517      203911 :         unsigned int blkbits = inode->i_blkbits;
    4518             : 
    4519      203911 :         trace_ext4_zero_range(inode, offset, len, mode);
    4520             : 
    4521             :         /*
    4522             :          * Round up offset. This is not fallocate, we need to zero out
    4523             :          * blocks, so convert interior block aligned part of the range to
    4524             :          * unwritten and possibly manually zero out unaligned parts of the
    4525             :          * range.
    4526             :          */
    4527      203911 :         start = round_up(offset, 1 << blkbits);
    4528      203911 :         end = round_down((offset + len), 1 << blkbits);
    4529             : 
    4530      203911 :         if (start < offset || end > offset + len)
    4531             :                 return -EINVAL;
    4532      203911 :         partial_begin = offset & ((1 << blkbits) - 1);
    4533      203911 :         partial_end = (offset + len) & ((1 << blkbits) - 1);
    4534             : 
    4535      203911 :         lblk = start >> blkbits;
    4536      203911 :         max_blocks = (end >> blkbits);
    4537      203911 :         if (max_blocks < lblk)
    4538             :                 max_blocks = 0;
    4539             :         else
    4540      199305 :                 max_blocks -= lblk;
    4541             : 
    4542      203911 :         inode_lock(inode);
    4543             : 
    4544             :         /*
    4545             :          * Indirect files do not support unwritten extents
    4546             :          */
    4547      203912 :         if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
    4548          14 :                 ret = -EOPNOTSUPP;
    4549          14 :                 goto out_mutex;
    4550             :         }
    4551             : 
    4552      203898 :         if (!(mode & FALLOC_FL_KEEP_SIZE) &&
    4553      100295 :             (offset + len > inode->i_size ||
    4554       66505 :              offset + len > EXT4_I(inode)->i_disksize)) {
    4555       36485 :                 new_size = offset + len;
    4556       36485 :                 ret = inode_newsize_ok(inode, new_size);
    4557       36484 :                 if (ret)
    4558           0 :                         goto out_mutex;
    4559             :         }
    4560             : 
    4561      203897 :         flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
    4562             : 
    4563             :         /* Wait all existing dio workers, newcomers will block on i_rwsem */
    4564      203897 :         inode_dio_wait(inode);
    4565             : 
    4566      203895 :         ret = file_modified(file);
    4567      203898 :         if (ret)
    4568           0 :                 goto out_mutex;
    4569             : 
    4570             :         /* Preallocate the range including the unaligned edges */
    4571      203898 :         if (partial_begin || partial_end) {
    4572      200253 :                 ret = ext4_alloc_file_blocks(file,
    4573      200253 :                                 round_down(offset, 1 << blkbits) >> blkbits,
    4574      200253 :                                 (round_up((offset + len), 1 << blkbits) -
    4575      200253 :                                  round_down(offset, 1 << blkbits)) >> blkbits,
    4576             :                                 new_size, flags);
    4577      200254 :                 if (ret)
    4578        2951 :                         goto out_mutex;
    4579             : 
    4580             :         }
    4581             : 
    4582             :         /* Zero range excluding the unaligned edges */
    4583      200948 :         if (max_blocks > 0) {
    4584      186492 :                 flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
    4585             :                           EXT4_EX_NOCACHE);
    4586             : 
    4587             :                 /*
    4588             :                  * Prevent page faults from reinstantiating pages we have
    4589             :                  * released from page cache.
    4590             :                  */
    4591      186492 :                 filemap_invalidate_lock(mapping);
    4592             : 
    4593      186491 :                 ret = ext4_break_layouts(inode);
    4594      186488 :                 if (ret) {
    4595           0 :                         filemap_invalidate_unlock(mapping);
    4596           0 :                         goto out_mutex;
    4597             :                 }
    4598             : 
    4599      186488 :                 ret = ext4_update_disksize_before_punch(inode, offset, len);
    4600      186488 :                 if (ret) {
    4601           0 :                         filemap_invalidate_unlock(mapping);
    4602           0 :                         goto out_mutex;
    4603             :                 }
    4604             : 
    4605             :                 /*
    4606             :                  * For journalled data we need to write (and checkpoint) pages
    4607             :                  * before discarding page cache to avoid inconsitent data on
    4608             :                  * disk in case of crash before zeroing trans is committed.
    4609             :                  */
    4610      186488 :                 if (ext4_should_journal_data(inode)) {
    4611           0 :                         ret = filemap_write_and_wait_range(mapping, start, end);
    4612           0 :                         if (ret) {
    4613           0 :                                 filemap_invalidate_unlock(mapping);
    4614           0 :                                 goto out_mutex;
    4615             :                         }
    4616             :                 }
    4617             : 
    4618             :                 /* Now release the pages and zero block aligned part of pages */
    4619      186487 :                 truncate_pagecache_range(inode, start, end - 1);
    4620      186486 :                 inode->i_mtime = inode->i_ctime = current_time(inode);
    4621             : 
    4622      186487 :                 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
    4623             :                                              flags);
    4624      186492 :                 filemap_invalidate_unlock(mapping);
    4625      186492 :                 if (ret)
    4626           0 :                         goto out_mutex;
    4627             :         }
    4628      200948 :         if (!partial_begin && !partial_end)
    4629        3645 :                 goto out_mutex;
    4630             : 
    4631             :         /*
    4632             :          * In worst case we have to writeout two nonadjacent unwritten
    4633             :          * blocks and update the inode
    4634             :          */
    4635      197303 :         credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
    4636      197303 :         if (ext4_should_journal_data(inode))
    4637           0 :                 credits += 2;
    4638      197303 :         handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
    4639      197302 :         if (IS_ERR(handle)) {
    4640           0 :                 ret = PTR_ERR(handle);
    4641           0 :                 ext4_std_error(inode->i_sb, ret);
    4642           0 :                 goto out_mutex;
    4643             :         }
    4644             : 
    4645      197302 :         inode->i_mtime = inode->i_ctime = current_time(inode);
    4646      197302 :         if (new_size)
    4647       35308 :                 ext4_update_inode_size(inode, new_size);
    4648      197302 :         ret = ext4_mark_inode_dirty(handle, inode);
    4649      197303 :         if (unlikely(ret))
    4650           0 :                 goto out_handle;
    4651             :         /* Zero out partial block at the edges of the range */
    4652      197303 :         ret = ext4_zero_partial_blocks(handle, inode, offset, len);
    4653      197303 :         if (ret >= 0)
    4654      197303 :                 ext4_update_inode_fsync_trans(handle, inode, 1);
    4655             : 
    4656      197302 :         if (file->f_flags & O_SYNC)
    4657           0 :                 ext4_handle_sync(handle);
    4658             : 
    4659      197302 : out_handle:
    4660      197302 :         ext4_journal_stop(handle);
    4661      203912 : out_mutex:
    4662      203912 :         inode_unlock(inode);
    4663      203912 :         return ret;
    4664             : }
    4665             : 
    4666             : /*
    4667             :  * preallocate space for a file. This implements ext4's fallocate file
    4668             :  * operation, which gets called from sys_fallocate system call.
    4669             :  * For block-mapped files, posix_fallocate should fall back to the method
    4670             :  * of writing zeroes to the required new blocks (the same behavior which is
    4671             :  * expected for file systems which do not support fallocate() system call).
    4672             :  */
    4673     1349749 : long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
    4674             : {
    4675     1349749 :         struct inode *inode = file_inode(file);
    4676     1349749 :         loff_t new_size = 0;
    4677     1349749 :         unsigned int max_blocks;
    4678     1349749 :         int ret = 0;
    4679     1349749 :         int flags;
    4680     1349749 :         ext4_lblk_t lblk;
    4681     1349749 :         unsigned int blkbits = inode->i_blkbits;
    4682             : 
    4683             :         /*
    4684             :          * Encrypted inodes can't handle collapse range or insert
    4685             :          * range since we would need to re-encrypt blocks with a
    4686             :          * different IV or XTS tweak (which are based on the logical
    4687             :          * block number).
    4688             :          */
    4689     1349749 :         if (IS_ENCRYPTED(inode) &&
    4690           0 :             (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
    4691             :                 return -EOPNOTSUPP;
    4692             : 
    4693             :         /* Return error if mode is not supported */
    4694     1349749 :         if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
    4695             :                      FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
    4696             :                      FALLOC_FL_INSERT_RANGE))
    4697             :                 return -EOPNOTSUPP;
    4698             : 
    4699     1349749 :         inode_lock(inode);
    4700     1349767 :         ret = ext4_convert_inline_data(inode);
    4701     1349776 :         inode_unlock(inode);
    4702     1349783 :         if (ret)
    4703           0 :                 goto exit;
    4704             : 
    4705     1349783 :         if (mode & FALLOC_FL_PUNCH_HOLE) {
    4706      266121 :                 ret = ext4_punch_hole(file, offset, len);
    4707      266125 :                 goto exit;
    4708             :         }
    4709             : 
    4710     1083662 :         if (mode & FALLOC_FL_COLLAPSE_RANGE) {
    4711      202826 :                 ret = ext4_collapse_range(file, offset, len);
    4712      202827 :                 goto exit;
    4713             :         }
    4714             : 
    4715      880836 :         if (mode & FALLOC_FL_INSERT_RANGE) {
    4716      146872 :                 ret = ext4_insert_range(file, offset, len);
    4717      146869 :                 goto exit;
    4718             :         }
    4719             : 
    4720      733964 :         if (mode & FALLOC_FL_ZERO_RANGE) {
    4721      203913 :                 ret = ext4_zero_range(file, offset, len, mode);
    4722      203912 :                 goto exit;
    4723             :         }
    4724      530051 :         trace_ext4_fallocate_enter(inode, offset, len, mode);
    4725      530039 :         lblk = offset >> blkbits;
    4726             : 
    4727      530039 :         max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
    4728      530039 :         flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
    4729             : 
    4730      530039 :         inode_lock(inode);
    4731             : 
    4732             :         /*
    4733             :          * We only support preallocation for extent-based files only
    4734             :          */
    4735      530060 :         if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
    4736          28 :                 ret = -EOPNOTSUPP;
    4737          28 :                 goto out;
    4738             :         }
    4739             : 
    4740      530032 :         if (!(mode & FALLOC_FL_KEEP_SIZE) &&
    4741      416267 :             (offset + len > inode->i_size ||
    4742       72879 :              offset + len > EXT4_I(inode)->i_disksize)) {
    4743      345979 :                 new_size = offset + len;
    4744      345979 :                 ret = inode_newsize_ok(inode, new_size);
    4745      345979 :                 if (ret)
    4746           1 :                         goto out;
    4747             :         }
    4748             : 
    4749             :         /* Wait all existing dio workers, newcomers will block on i_rwsem */
    4750      530031 :         inode_dio_wait(inode);
    4751             : 
    4752      530026 :         ret = file_modified(file);
    4753      530031 :         if (ret)
    4754           0 :                 goto out;
    4755             : 
    4756      530031 :         ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
    4757      530030 :         if (ret)
    4758        9539 :                 goto out;
    4759             : 
    4760      520491 :         if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
    4761           0 :                 ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
    4762           0 :                                         EXT4_I(inode)->i_sync_tid);
    4763             :         }
    4764      520491 : out:
    4765      530059 :         inode_unlock(inode);
    4766      530057 :         trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
    4767     1349787 : exit:
    4768     1349787 :         return ret;
    4769             : }
    4770             : 
    4771             : /*
    4772             :  * This function convert a range of blocks to written extents
    4773             :  * The caller of this function will pass the start offset and the size.
    4774             :  * all unwritten extents within this range will be converted to
    4775             :  * written extents.
    4776             :  *
    4777             :  * This function is called from the direct IO end io call back
    4778             :  * function, to convert the fallocated extents after IO is completed.
    4779             :  * Returns 0 on success.
    4780             :  */
    4781     1555134 : int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
    4782             :                                    loff_t offset, ssize_t len)
    4783             : {
    4784     1555134 :         unsigned int max_blocks;
    4785     1555134 :         int ret = 0, ret2 = 0, ret3 = 0;
    4786     1555134 :         struct ext4_map_blocks map;
    4787     1555134 :         unsigned int blkbits = inode->i_blkbits;
    4788     1555134 :         unsigned int credits = 0;
    4789             : 
    4790     1555134 :         map.m_lblk = offset >> blkbits;
    4791     1555134 :         max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
    4792             : 
    4793     1555134 :         if (!handle) {
    4794             :                 /*
    4795             :                  * credits to insert 1 extent into extent tree
    4796             :                  */
    4797      432545 :                 credits = ext4_chunk_trans_blocks(inode, max_blocks);
    4798             :         }
    4799     3298881 :         while (ret >= 0 && ret < max_blocks) {
    4800     1743753 :                 map.m_lblk += ret;
    4801     1743753 :                 map.m_len = (max_blocks -= ret);
    4802     1743753 :                 if (credits) {
    4803      621160 :                         handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
    4804             :                                                     credits);
    4805      621156 :                         if (IS_ERR(handle)) {
    4806           1 :                                 ret = PTR_ERR(handle);
    4807           1 :                                 break;
    4808             :                         }
    4809             :                 }
    4810     1743748 :                 ret = ext4_map_blocks(handle, inode, &map,
    4811             :                                       EXT4_GET_BLOCKS_IO_CONVERT_EXT);
    4812     1743721 :                 if (ret <= 0)
    4813           0 :                         ext4_warning(inode->i_sb,
    4814             :                                      "inode #%lu: block %u: len %u: "
    4815             :                                      "ext4_ext_map_blocks returned %d",
    4816             :                                      inode->i_ino, map.m_lblk,
    4817             :                                      map.m_len, ret);
    4818     1743721 :                 ret2 = ext4_mark_inode_dirty(handle, inode);
    4819     1743747 :                 if (credits) {
    4820      621160 :                         ret3 = ext4_journal_stop(handle);
    4821      621157 :                         if (unlikely(ret3))
    4822           0 :                                 ret2 = ret3;
    4823             :                 }
    4824             : 
    4825     1743744 :                 if (ret <= 0 || ret2)
    4826             :                         break;
    4827             :         }
    4828     1555129 :         return ret > 0 ? ret2 : ret;
    4829             : }
    4830             : 
    4831     1122636 : int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
    4832             : {
    4833     1122636 :         int ret = 0, err = 0;
    4834     1122636 :         struct ext4_io_end_vec *io_end_vec;
    4835             : 
    4836             :         /*
    4837             :          * This is somewhat ugly but the idea is clear: When transaction is
    4838             :          * reserved, everything goes into it. Otherwise we rather start several
    4839             :          * smaller transactions for conversion of each extent separately.
    4840             :          */
    4841     1122636 :         if (handle) {
    4842     1122615 :                 handle = ext4_journal_start_reserved(handle,
    4843             :                                                      EXT4_HT_EXT_CONVERT);
    4844     1122615 :                 if (IS_ERR(handle))
    4845          16 :                         return PTR_ERR(handle);
    4846             :         }
    4847             : 
    4848     2245240 :         list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
    4849     1122620 :                 ret = ext4_convert_unwritten_extents(handle, io_end->inode,
    4850             :                                                      io_end_vec->offset,
    4851             :                                                      io_end_vec->size);
    4852     1122620 :                 if (ret)
    4853             :                         break;
    4854             :         }
    4855             : 
    4856     1122620 :         if (handle)
    4857     1122599 :                 err = ext4_journal_stop(handle);
    4858             : 
    4859     1122620 :         return ret < 0 ? ret : err;
    4860             : }
    4861             : 
    4862        3382 : static int ext4_iomap_xattr_fiemap(struct inode *inode, struct iomap *iomap)
    4863             : {
    4864        3382 :         __u64 physical = 0;
    4865        3382 :         __u64 length = 0;
    4866        3382 :         int blockbits = inode->i_sb->s_blocksize_bits;
    4867        3382 :         int error = 0;
    4868        3382 :         u16 iomap_type;
    4869             : 
    4870             :         /* in-inode? */
    4871        3382 :         if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
    4872         560 :                 struct ext4_iloc iloc;
    4873         560 :                 int offset;     /* offset of xattr in inode */
    4874             : 
    4875         560 :                 error = ext4_get_inode_loc(inode, &iloc);
    4876         560 :                 if (error)
    4877           0 :                         return error;
    4878         560 :                 physical = (__u64)iloc.bh->b_blocknr << blockbits;
    4879         560 :                 offset = EXT4_GOOD_OLD_INODE_SIZE +
    4880         560 :                                 EXT4_I(inode)->i_extra_isize;
    4881         560 :                 physical += offset;
    4882         560 :                 length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
    4883         560 :                 brelse(iloc.bh);
    4884         560 :                 iomap_type = IOMAP_INLINE;
    4885        2822 :         } else if (EXT4_I(inode)->i_file_acl) { /* external block */
    4886         221 :                 physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits;
    4887         221 :                 length = inode->i_sb->s_blocksize;
    4888         221 :                 iomap_type = IOMAP_MAPPED;
    4889             :         } else {
    4890             :                 /* no in-inode or external block for xattr, so return -ENOENT */
    4891        2601 :                 error = -ENOENT;
    4892        2601 :                 goto out;
    4893             :         }
    4894             : 
    4895         781 :         iomap->addr = physical;
    4896         781 :         iomap->offset = 0;
    4897         781 :         iomap->length = length;
    4898         781 :         iomap->type = iomap_type;
    4899         781 :         iomap->flags = 0;
    4900             : out:
    4901             :         return error;
    4902             : }
    4903             : 
    4904        3382 : static int ext4_iomap_xattr_begin(struct inode *inode, loff_t offset,
    4905             :                                   loff_t length, unsigned flags,
    4906             :                                   struct iomap *iomap, struct iomap *srcmap)
    4907             : {
    4908        3382 :         int error;
    4909             : 
    4910        3382 :         error = ext4_iomap_xattr_fiemap(inode, iomap);
    4911        3382 :         if (error == 0 && (offset >= iomap->length))
    4912         777 :                 error = -ENOENT;
    4913        3382 :         return error;
    4914             : }
    4915             : 
    4916             : static const struct iomap_ops ext4_iomap_xattr_ops = {
    4917             :         .iomap_begin            = ext4_iomap_xattr_begin,
    4918             : };
    4919             : 
    4920       43708 : static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len)
    4921             : {
    4922       43708 :         u64 maxbytes;
    4923             : 
    4924       43708 :         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
    4925       43691 :                 maxbytes = inode->i_sb->s_maxbytes;
    4926             :         else
    4927          17 :                 maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
    4928             : 
    4929       43708 :         if (*len == 0)
    4930             :                 return -EINVAL;
    4931       43708 :         if (start > maxbytes)
    4932             :                 return -EFBIG;
    4933             : 
    4934             :         /*
    4935             :          * Shrink request scope to what the fs can actually handle.
    4936             :          */
    4937       43708 :         if (*len > maxbytes || (maxbytes - *len) < start)
    4938       28282 :                 *len = maxbytes - start;
    4939             :         return 0;
    4940             : }
    4941             : 
    4942       43709 : int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
    4943             :                 u64 start, u64 len)
    4944             : {
    4945       43709 :         int error = 0;
    4946             : 
    4947       43709 :         if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
    4948           0 :                 error = ext4_ext_precache(inode);
    4949           0 :                 if (error)
    4950             :                         return error;
    4951           0 :                 fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
    4952             :         }
    4953             : 
    4954             :         /*
    4955             :          * For bitmap files the maximum size limit could be smaller than
    4956             :          * s_maxbytes, so check len here manually instead of just relying on the
    4957             :          * generic check.
    4958             :          */
    4959       43709 :         error = ext4_fiemap_check_ranges(inode, start, &len);
    4960       43709 :         if (error)
    4961             :                 return error;
    4962             : 
    4963       43709 :         if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
    4964        6738 :                 fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
    4965        6738 :                 return iomap_fiemap(inode, fieinfo, start, len,
    4966             :                                     &ext4_iomap_xattr_ops);
    4967             :         }
    4968             : 
    4969       36971 :         return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_report_ops);
    4970             : }
    4971             : 
    4972           0 : int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
    4973             :                       __u64 start, __u64 len)
    4974             : {
    4975           0 :         ext4_lblk_t start_blk, len_blks;
    4976           0 :         __u64 last_blk;
    4977           0 :         int error = 0;
    4978             : 
    4979           0 :         if (ext4_has_inline_data(inode)) {
    4980           0 :                 int has_inline;
    4981             : 
    4982           0 :                 down_read(&EXT4_I(inode)->xattr_sem);
    4983           0 :                 has_inline = ext4_has_inline_data(inode);
    4984           0 :                 up_read(&EXT4_I(inode)->xattr_sem);
    4985           0 :                 if (has_inline)
    4986             :                         return 0;
    4987             :         }
    4988             : 
    4989           0 :         if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
    4990           0 :                 error = ext4_ext_precache(inode);
    4991           0 :                 if (error)
    4992             :                         return error;
    4993           0 :                 fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
    4994             :         }
    4995             : 
    4996           0 :         error = fiemap_prep(inode, fieinfo, start, &len, 0);
    4997           0 :         if (error)
    4998             :                 return error;
    4999             : 
    5000           0 :         error = ext4_fiemap_check_ranges(inode, start, &len);
    5001           0 :         if (error)
    5002             :                 return error;
    5003             : 
    5004           0 :         start_blk = start >> inode->i_sb->s_blocksize_bits;
    5005           0 :         last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
    5006           0 :         if (last_blk >= EXT_MAX_BLOCKS)
    5007             :                 last_blk = EXT_MAX_BLOCKS-1;
    5008           0 :         len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
    5009             : 
    5010             :         /*
    5011             :          * Walk the extent tree gathering extent information
    5012             :          * and pushing extents back to the user.
    5013             :          */
    5014           0 :         return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
    5015             : }
    5016             : 
    5017             : /*
    5018             :  * ext4_ext_shift_path_extents:
    5019             :  * Shift the extents of a path structure lying between path[depth].p_ext
    5020             :  * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
    5021             :  * if it is right shift or left shift operation.
    5022             :  */
    5023             : static int
    5024      349133 : ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
    5025             :                             struct inode *inode, handle_t *handle,
    5026             :                             enum SHIFT_DIRECTION SHIFT)
    5027             : {
    5028      349133 :         int depth, err = 0;
    5029      349133 :         struct ext4_extent *ex_start, *ex_last;
    5030      349133 :         bool update = false;
    5031      349133 :         int credits, restart_credits;
    5032      349133 :         depth = path->p_depth;
    5033             : 
    5034      360308 :         while (depth >= 0) {
    5035      349139 :                 if (depth == path->p_depth) {
    5036      349133 :                         ex_start = path[depth].p_ext;
    5037      349133 :                         if (!ex_start)
    5038             :                                 return -EFSCORRUPTED;
    5039             : 
    5040      349133 :                         ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
    5041             :                         /* leaf + sb + inode */
    5042      349133 :                         credits = 3;
    5043      349133 :                         if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) {
    5044       58888 :                                 update = true;
    5045             :                                 /* extent tree + sb + inode */
    5046       58888 :                                 credits = depth + 2;
    5047             :                         }
    5048             : 
    5049      349133 :                         restart_credits = ext4_writepage_trans_blocks(inode);
    5050      349135 :                         err = ext4_datasem_ensure_credits(handle, inode, credits,
    5051             :                                         restart_credits, 0);
    5052      349131 :                         if (err) {
    5053           0 :                                 if (err > 0)
    5054           0 :                                         err = -EAGAIN;
    5055           0 :                                 goto out;
    5056             :                         }
    5057             : 
    5058      349131 :                         err = ext4_ext_get_access(handle, inode, path + depth);
    5059      349136 :                         if (err)
    5060           0 :                                 goto out;
    5061             : 
    5062    10524426 :                         while (ex_start <= ex_last) {
    5063    10175291 :                                 if (SHIFT == SHIFT_LEFT) {
    5064     9423682 :                                         le32_add_cpu(&ex_start->ee_block,
    5065             :                                                 -shift);
    5066             :                                         /* Try to merge to the left. */
    5067     9423682 :                                         if ((ex_start >
    5068     9423682 :                                             EXT_FIRST_EXTENT(path[depth].p_hdr))
    5069     9373729 :                                             &&
    5070     9373732 :                                             ext4_ext_try_to_merge_right(inode,
    5071             :                                             path, ex_start - 1))
    5072          76 :                                                 ex_last--;
    5073             :                                         else
    5074     9423603 :                                                 ex_start++;
    5075             :                                 } else {
    5076      751609 :                                         le32_add_cpu(&ex_last->ee_block, shift);
    5077      751609 :                                         ext4_ext_try_to_merge_right(inode, path,
    5078             :                                                 ex_last);
    5079      751611 :                                         ex_last--;
    5080             :                                 }
    5081             :                         }
    5082      349135 :                         err = ext4_ext_dirty(handle, inode, path + depth);
    5083      349134 :                         if (err)
    5084           0 :                                 goto out;
    5085             : 
    5086      349134 :                         if (--depth < 0 || !update)
    5087             :                                 break;
    5088             :                 }
    5089             : 
    5090             :                 /* Update index too */
    5091       51577 :                 err = ext4_ext_get_access(handle, inode, path + depth);
    5092       51577 :                 if (err)
    5093           0 :                         goto out;
    5094             : 
    5095       51577 :                 if (SHIFT == SHIFT_LEFT)
    5096       46218 :                         le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
    5097             :                 else
    5098        5359 :                         le32_add_cpu(&path[depth].p_idx->ei_block, shift);
    5099       51577 :                 err = ext4_ext_dirty(handle, inode, path + depth);
    5100       51577 :                 if (err)
    5101           0 :                         goto out;
    5102             : 
    5103             :                 /* we are done if current index is not a starting index */
    5104       51577 :                 if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
    5105             :                         break;
    5106             : 
    5107       11175 :                 depth--;
    5108             :         }
    5109             : 
    5110      349134 : out:
    5111             :         return err;
    5112             : }
    5113             : 
    5114             : /*
    5115             :  * ext4_ext_shift_extents:
    5116             :  * All the extents which lies in the range from @start to the last allocated
    5117             :  * block for the @inode are shifted either towards left or right (depending
    5118             :  * upon @SHIFT) by @shift blocks.
    5119             :  * On success, 0 is returned, error otherwise.
    5120             :  */
    5121             : static int
    5122      339558 : ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
    5123             :                        ext4_lblk_t start, ext4_lblk_t shift,
    5124             :                        enum SHIFT_DIRECTION SHIFT)
    5125             : {
    5126      339558 :         struct ext4_ext_path *path;
    5127      339558 :         int ret = 0, depth;
    5128      339558 :         struct ext4_extent *extent;
    5129      339558 :         ext4_lblk_t stop, *iterator, ex_start, ex_end;
    5130      339558 :         ext4_lblk_t tmp = EXT_MAX_BLOCKS;
    5131             : 
    5132             :         /* Let path point to the last extent */
    5133      339558 :         path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
    5134             :                                 EXT4_EX_NOCACHE);
    5135      339558 :         if (IS_ERR(path))
    5136           0 :                 return PTR_ERR(path);
    5137             : 
    5138      339558 :         depth = path->p_depth;
    5139      339558 :         extent = path[depth].p_ext;
    5140      339558 :         if (!extent)
    5141       10065 :                 goto out;
    5142             : 
    5143      329493 :         stop = le32_to_cpu(extent->ee_block);
    5144             : 
    5145             :        /*
    5146             :         * For left shifts, make sure the hole on the left is big enough to
    5147             :         * accommodate the shift.  For right shifts, make sure the last extent
    5148             :         * won't be shifted beyond EXT_MAX_BLOCKS.
    5149             :         */
    5150      329493 :         if (SHIFT == SHIFT_LEFT) {
    5151      188390 :                 path = ext4_find_extent(inode, start - 1, &path,
    5152             :                                         EXT4_EX_NOCACHE);
    5153      188391 :                 if (IS_ERR(path))
    5154           0 :                         return PTR_ERR(path);
    5155      188391 :                 depth = path->p_depth;
    5156      188391 :                 extent =  path[depth].p_ext;
    5157      188391 :                 if (extent) {
    5158      188391 :                         ex_start = le32_to_cpu(extent->ee_block);
    5159      188391 :                         ex_end = le32_to_cpu(extent->ee_block) +
    5160      188391 :                                 ext4_ext_get_actual_len(extent);
    5161             :                 } else {
    5162             :                         ex_start = 0;
    5163             :                         ex_end = 0;
    5164             :                 }
    5165             : 
    5166      188391 :                 if ((start == ex_start && shift > ex_start) ||
    5167      188391 :                     (shift > start - ex_end)) {
    5168           0 :                         ret = -EINVAL;
    5169           0 :                         goto out;
    5170             :                 }
    5171             :         } else {
    5172      141103 :                 if (shift > EXT_MAX_BLOCKS -
    5173      141103 :                     (stop + ext4_ext_get_actual_len(extent))) {
    5174           1 :                         ret = -EINVAL;
    5175           1 :                         goto out;
    5176             :                 }
    5177             :         }
    5178             : 
    5179             :         /*
    5180             :          * In case of left shift, iterator points to start and it is increased
    5181             :          * till we reach stop. In case of right shift, iterator points to stop
    5182             :          * and it is decreased till we reach start.
    5183             :          */
    5184      141102 : again:
    5185      329493 :         ret = 0;
    5186      329493 :         if (SHIFT == SHIFT_LEFT)
    5187             :                 iterator = &start;
    5188             :         else
    5189      141102 :                 iterator = &stop;
    5190             : 
    5191      329493 :         if (tmp != EXT_MAX_BLOCKS)
    5192           0 :                 *iterator = tmp;
    5193             : 
    5194             :         /*
    5195             :          * Its safe to start updating extents.  Start and stop are unsigned, so
    5196             :          * in case of right shift if extent with 0 block is reached, iterator
    5197             :          * becomes NULL to indicate the end of the loop.
    5198             :          */
    5199      678626 :         while (iterator && start <= stop) {
    5200      349135 :                 path = ext4_find_extent(inode, *iterator, &path,
    5201             :                                         EXT4_EX_NOCACHE);
    5202      349136 :                 if (IS_ERR(path))
    5203           0 :                         return PTR_ERR(path);
    5204      349136 :                 depth = path->p_depth;
    5205      349136 :                 extent = path[depth].p_ext;
    5206      349136 :                 if (!extent) {
    5207           0 :                         EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
    5208             :                                          (unsigned long) *iterator);
    5209           0 :                         return -EFSCORRUPTED;
    5210             :                 }
    5211      349136 :                 if (SHIFT == SHIFT_LEFT && *iterator >
    5212      215830 :                     le32_to_cpu(extent->ee_block)) {
    5213             :                         /* Hole, move to the next extent */
    5214       63857 :                         if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
    5215       63857 :                                 path[depth].p_ext++;
    5216             :                         } else {
    5217           0 :                                 *iterator = ext4_ext_next_allocated_block(path);
    5218           0 :                                 continue;
    5219             :                         }
    5220             :                 }
    5221             : 
    5222      349136 :                 tmp = *iterator;
    5223      349136 :                 if (SHIFT == SHIFT_LEFT) {
    5224      215830 :                         extent = EXT_LAST_EXTENT(path[depth].p_hdr);
    5225      215830 :                         *iterator = le32_to_cpu(extent->ee_block) +
    5226      215830 :                                         ext4_ext_get_actual_len(extent);
    5227             :                 } else {
    5228      133306 :                         extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
    5229      133306 :                         if (le32_to_cpu(extent->ee_block) > start)
    5230         159 :                                 *iterator = le32_to_cpu(extent->ee_block) - 1;
    5231      133147 :                         else if (le32_to_cpu(extent->ee_block) == start)
    5232             :                                 iterator = NULL;
    5233             :                         else {
    5234      124367 :                                 extent = EXT_LAST_EXTENT(path[depth].p_hdr);
    5235      774737 :                                 while (le32_to_cpu(extent->ee_block) >= start)
    5236      650370 :                                         extent--;
    5237             : 
    5238      124367 :                                 if (extent == EXT_LAST_EXTENT(path[depth].p_hdr))
    5239             :                                         break;
    5240             : 
    5241      124367 :                                 extent++;
    5242      124367 :                                 iterator = NULL;
    5243             :                         }
    5244      133306 :                         path[depth].p_ext = extent;
    5245             :                 }
    5246      349136 :                 ret = ext4_ext_shift_path_extents(path, shift, inode,
    5247             :                                 handle, SHIFT);
    5248             :                 /* iterator can be NULL which means we should break */
    5249      349133 :                 if (ret == -EAGAIN)
    5250           0 :                         goto again;
    5251      349133 :                 if (ret)
    5252             :                         break;
    5253             :         }
    5254      329491 : out:
    5255      339557 :         ext4_free_ext_path(path);
    5256      339559 :         return ret;
    5257             : }
    5258             : 
    5259             : /*
    5260             :  * ext4_collapse_range:
    5261             :  * This implements the fallocate's collapse range functionality for ext4
    5262             :  * Returns: 0 and non-zero on error.
    5263             :  */
    5264      202827 : static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
    5265             : {
    5266      202827 :         struct inode *inode = file_inode(file);
    5267      202827 :         struct super_block *sb = inode->i_sb;
    5268      202827 :         struct address_space *mapping = inode->i_mapping;
    5269      202827 :         ext4_lblk_t punch_start, punch_stop;
    5270      202827 :         handle_t *handle;
    5271      202827 :         unsigned int credits;
    5272      202827 :         loff_t new_size, ioffset;
    5273      202827 :         int ret;
    5274             : 
    5275             :         /*
    5276             :          * We need to test this early because xfstests assumes that a
    5277             :          * collapse range of (0, 1) will return EOPNOTSUPP if the file
    5278             :          * system does not support collapse range.
    5279             :          */
    5280      202827 :         if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
    5281             :                 return -EOPNOTSUPP;
    5282             : 
    5283             :         /* Collapse range works only on fs cluster size aligned regions. */
    5284      202820 :         if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
    5285             :                 return -EINVAL;
    5286             : 
    5287      199614 :         trace_ext4_collapse_range(inode, offset, len);
    5288             : 
    5289      199613 :         punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
    5290      199613 :         punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
    5291             : 
    5292      199613 :         inode_lock(inode);
    5293             :         /*
    5294             :          * There is no need to overlap collapse range with EOF, in which case
    5295             :          * it is effectively a truncate operation
    5296             :          */
    5297      199613 :         if (offset + len >= inode->i_size) {
    5298        2159 :                 ret = -EINVAL;
    5299        2159 :                 goto out_mutex;
    5300             :         }
    5301             : 
    5302             :         /* Currently just for extent based files */
    5303      197454 :         if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
    5304           0 :                 ret = -EOPNOTSUPP;
    5305           0 :                 goto out_mutex;
    5306             :         }
    5307             : 
    5308             :         /* Wait for existing dio to complete */
    5309      197454 :         inode_dio_wait(inode);
    5310             : 
    5311      197453 :         ret = file_modified(file);
    5312      197455 :         if (ret)
    5313           0 :                 goto out_mutex;
    5314             : 
    5315             :         /*
    5316             :          * Prevent page faults from reinstantiating pages we have released from
    5317             :          * page cache.
    5318             :          */
    5319      197455 :         filemap_invalidate_lock(mapping);
    5320             : 
    5321      197455 :         ret = ext4_break_layouts(inode);
    5322      197455 :         if (ret)
    5323           0 :                 goto out_mmap;
    5324             : 
    5325             :         /*
    5326             :          * Need to round down offset to be aligned with page size boundary
    5327             :          * for page size > block size.
    5328             :          */
    5329      197455 :         ioffset = round_down(offset, PAGE_SIZE);
    5330             :         /*
    5331             :          * Write tail of the last page before removed range since it will get
    5332             :          * removed from the page cache below.
    5333             :          */
    5334      197455 :         ret = filemap_write_and_wait_range(mapping, ioffset, offset);
    5335      197453 :         if (ret)
    5336           0 :                 goto out_mmap;
    5337             :         /*
    5338             :          * Write data that will be shifted to preserve them when discarding
    5339             :          * page cache below. We are also protected from pages becoming dirty
    5340             :          * by i_rwsem and invalidate_lock.
    5341             :          */
    5342      197453 :         ret = filemap_write_and_wait_range(mapping, offset + len,
    5343             :                                            LLONG_MAX);
    5344      197454 :         if (ret)
    5345           0 :                 goto out_mmap;
    5346      197454 :         truncate_pagecache(inode, ioffset);
    5347             : 
    5348      197455 :         credits = ext4_writepage_trans_blocks(inode);
    5349      197455 :         handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
    5350      197455 :         if (IS_ERR(handle)) {
    5351           0 :                 ret = PTR_ERR(handle);
    5352           0 :                 goto out_mmap;
    5353             :         }
    5354      197455 :         ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
    5355             : 
    5356      197455 :         down_write(&EXT4_I(inode)->i_data_sem);
    5357      197455 :         ext4_discard_preallocations(inode, 0);
    5358      197455 :         ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start);
    5359             : 
    5360      197455 :         ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
    5361      197455 :         if (ret) {
    5362           0 :                 up_write(&EXT4_I(inode)->i_data_sem);
    5363           0 :                 goto out_stop;
    5364             :         }
    5365      197455 :         ext4_discard_preallocations(inode, 0);
    5366             : 
    5367      197455 :         ret = ext4_ext_shift_extents(inode, handle, punch_stop,
    5368             :                                      punch_stop - punch_start, SHIFT_LEFT);
    5369      197454 :         if (ret) {
    5370           0 :                 up_write(&EXT4_I(inode)->i_data_sem);
    5371           0 :                 goto out_stop;
    5372             :         }
    5373             : 
    5374      197454 :         new_size = inode->i_size - len;
    5375      197454 :         i_size_write(inode, new_size);
    5376      197454 :         EXT4_I(inode)->i_disksize = new_size;
    5377             : 
    5378      197454 :         up_write(&EXT4_I(inode)->i_data_sem);
    5379      197454 :         if (IS_SYNC(inode))
    5380           0 :                 ext4_handle_sync(handle);
    5381      197454 :         inode->i_mtime = inode->i_ctime = current_time(inode);
    5382      197453 :         ret = ext4_mark_inode_dirty(handle, inode);
    5383      197455 :         ext4_update_inode_fsync_trans(handle, inode, 1);
    5384             : 
    5385      197455 : out_stop:
    5386      197455 :         ext4_journal_stop(handle);
    5387      197455 : out_mmap:
    5388      197455 :         filemap_invalidate_unlock(mapping);
    5389      199614 : out_mutex:
    5390      199614 :         inode_unlock(inode);
    5391      199614 :         return ret;
    5392             : }
    5393             : 
    5394             : /*
    5395             :  * ext4_insert_range:
    5396             :  * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
    5397             :  * The data blocks starting from @offset to the EOF are shifted by @len
    5398             :  * towards right to create a hole in the @inode. Inode size is increased
    5399             :  * by len bytes.
    5400             :  * Returns 0 on success, error otherwise.
    5401             :  */
    5402      146872 : static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
    5403             : {
    5404      146872 :         struct inode *inode = file_inode(file);
    5405      146872 :         struct super_block *sb = inode->i_sb;
    5406      146872 :         struct address_space *mapping = inode->i_mapping;
    5407      146872 :         handle_t *handle;
    5408      146872 :         struct ext4_ext_path *path;
    5409      146872 :         struct ext4_extent *extent;
    5410      146872 :         ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
    5411      146872 :         unsigned int credits, ee_len;
    5412      146872 :         int ret = 0, depth, split_flag = 0;
    5413      146872 :         loff_t ioffset;
    5414             : 
    5415             :         /*
    5416             :          * We need to test this early because xfstests assumes that an
    5417             :          * insert range of (0, 1) will return EOPNOTSUPP if the file
    5418             :          * system does not support insert range.
    5419             :          */
    5420      146872 :         if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
    5421             :                 return -EOPNOTSUPP;
    5422             : 
    5423             :         /* Insert range works only on fs cluster size aligned regions. */
    5424      146865 :         if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
    5425             :                 return -EINVAL;
    5426             : 
    5427      143679 :         trace_ext4_insert_range(inode, offset, len);
    5428             : 
    5429      143679 :         offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
    5430      143679 :         len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
    5431             : 
    5432      143679 :         inode_lock(inode);
    5433             :         /* Currently just for extent based files */
    5434      143679 :         if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
    5435           0 :                 ret = -EOPNOTSUPP;
    5436           0 :                 goto out_mutex;
    5437             :         }
    5438             : 
    5439             :         /* Check whether the maximum file size would be exceeded */
    5440      143679 :         if (len > inode->i_sb->s_maxbytes - inode->i_size) {
    5441           1 :                 ret = -EFBIG;
    5442           1 :                 goto out_mutex;
    5443             :         }
    5444             : 
    5445             :         /* Offset must be less than i_size */
    5446      143678 :         if (offset >= inode->i_size) {
    5447        1574 :                 ret = -EINVAL;
    5448        1574 :                 goto out_mutex;
    5449             :         }
    5450             : 
    5451             :         /* Wait for existing dio to complete */
    5452      142104 :         inode_dio_wait(inode);
    5453             : 
    5454      142104 :         ret = file_modified(file);
    5455      142104 :         if (ret)
    5456           0 :                 goto out_mutex;
    5457             : 
    5458             :         /*
    5459             :          * Prevent page faults from reinstantiating pages we have released from
    5460             :          * page cache.
    5461             :          */
    5462      142104 :         filemap_invalidate_lock(mapping);
    5463             : 
    5464      142104 :         ret = ext4_break_layouts(inode);
    5465      142104 :         if (ret)
    5466           0 :                 goto out_mmap;
    5467             : 
    5468             :         /*
    5469             :          * Need to round down to align start offset to page size boundary
    5470             :          * for page size > block size.
    5471             :          */
    5472      142104 :         ioffset = round_down(offset, PAGE_SIZE);
    5473             :         /* Write out all dirty pages */
    5474      142104 :         ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
    5475             :                         LLONG_MAX);
    5476      142103 :         if (ret)
    5477           0 :                 goto out_mmap;
    5478      142103 :         truncate_pagecache(inode, ioffset);
    5479             : 
    5480      142104 :         credits = ext4_writepage_trans_blocks(inode);
    5481      142104 :         handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
    5482      142103 :         if (IS_ERR(handle)) {
    5483           0 :                 ret = PTR_ERR(handle);
    5484           0 :                 goto out_mmap;
    5485             :         }
    5486      142103 :         ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
    5487             : 
    5488             :         /* Expand file to avoid data loss if there is error while shifting */
    5489      142103 :         inode->i_size += len;
    5490      142103 :         EXT4_I(inode)->i_disksize += len;
    5491      142103 :         inode->i_mtime = inode->i_ctime = current_time(inode);
    5492      142102 :         ret = ext4_mark_inode_dirty(handle, inode);
    5493      142104 :         if (ret)
    5494           0 :                 goto out_stop;
    5495             : 
    5496      142104 :         down_write(&EXT4_I(inode)->i_data_sem);
    5497      142103 :         ext4_discard_preallocations(inode, 0);
    5498             : 
    5499      142104 :         path = ext4_find_extent(inode, offset_lblk, NULL, 0);
    5500      142103 :         if (IS_ERR(path)) {
    5501           0 :                 up_write(&EXT4_I(inode)->i_data_sem);
    5502           0 :                 goto out_stop;
    5503             :         }
    5504             : 
    5505      142103 :         depth = ext_depth(inode);
    5506      142103 :         extent = path[depth].p_ext;
    5507      142103 :         if (extent) {
    5508      141102 :                 ee_start_lblk = le32_to_cpu(extent->ee_block);
    5509      141102 :                 ee_len = ext4_ext_get_actual_len(extent);
    5510             : 
    5511             :                 /*
    5512             :                  * If offset_lblk is not the starting block of extent, split
    5513             :                  * the extent @offset_lblk
    5514             :                  */
    5515      141102 :                 if ((offset_lblk > ee_start_lblk) &&
    5516      112238 :                                 (offset_lblk < (ee_start_lblk + ee_len))) {
    5517       47895 :                         if (ext4_ext_is_unwritten(extent))
    5518       17610 :                                 split_flag = EXT4_EXT_MARK_UNWRIT1 |
    5519             :                                         EXT4_EXT_MARK_UNWRIT2;
    5520       47895 :                         ret = ext4_split_extent_at(handle, inode, &path,
    5521             :                                         offset_lblk, split_flag,
    5522             :                                         EXT4_EX_NOCACHE |
    5523             :                                         EXT4_GET_BLOCKS_PRE_IO |
    5524             :                                         EXT4_GET_BLOCKS_METADATA_NOFAIL);
    5525             :                 }
    5526             : 
    5527      141102 :                 ext4_free_ext_path(path);
    5528      141103 :                 if (ret < 0) {
    5529           0 :                         up_write(&EXT4_I(inode)->i_data_sem);
    5530           0 :                         goto out_stop;
    5531             :                 }
    5532             :         } else {
    5533        1001 :                 ext4_free_ext_path(path);
    5534             :         }
    5535             : 
    5536      142104 :         ext4_es_remove_extent(inode, offset_lblk, EXT_MAX_BLOCKS - offset_lblk);
    5537             : 
    5538             :         /*
    5539             :          * if offset_lblk lies in a hole which is at start of file, use
    5540             :          * ee_start_lblk to shift extents
    5541             :          */
    5542      142103 :         ret = ext4_ext_shift_extents(inode, handle,
    5543      142103 :                 max(ee_start_lblk, offset_lblk), len_lblk, SHIFT_RIGHT);
    5544             : 
    5545      142104 :         up_write(&EXT4_I(inode)->i_data_sem);
    5546      142102 :         if (IS_SYNC(inode))
    5547           0 :                 ext4_handle_sync(handle);
    5548      142102 :         if (ret >= 0)
    5549      142102 :                 ext4_update_inode_fsync_trans(handle, inode, 1);
    5550             : 
    5551           0 : out_stop:
    5552      142100 :         ext4_journal_stop(handle);
    5553      142103 : out_mmap:
    5554      142103 :         filemap_invalidate_unlock(mapping);
    5555      143676 : out_mutex:
    5556      143676 :         inode_unlock(inode);
    5557      143676 :         return ret;
    5558             : }
    5559             : 
    5560             : /**
    5561             :  * ext4_swap_extents() - Swap extents between two inodes
    5562             :  * @handle: handle for this transaction
    5563             :  * @inode1:     First inode
    5564             :  * @inode2:     Second inode
    5565             :  * @lblk1:      Start block for first inode
    5566             :  * @lblk2:      Start block for second inode
    5567             :  * @count:      Number of blocks to swap
    5568             :  * @unwritten: Mark second inode's extents as unwritten after swap
    5569             :  * @erp:        Pointer to save error value
    5570             :  *
    5571             :  * This helper routine does exactly what is promise "swap extents". All other
    5572             :  * stuff such as page-cache locking consistency, bh mapping consistency or
    5573             :  * extent's data copying must be performed by caller.
    5574             :  * Locking:
    5575             :  *              i_rwsem is held for both inodes
    5576             :  *              i_data_sem is locked for write for both inodes
    5577             :  * Assumptions:
    5578             :  *              All pages from requested range are locked for both inodes
    5579             :  */
    5580             : int
    5581     1169656 : ext4_swap_extents(handle_t *handle, struct inode *inode1,
    5582             :                   struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
    5583             :                   ext4_lblk_t count, int unwritten, int *erp)
    5584             : {
    5585     1169656 :         struct ext4_ext_path *path1 = NULL;
    5586     1169656 :         struct ext4_ext_path *path2 = NULL;
    5587     1169656 :         int replaced_count = 0;
    5588             : 
    5589     1169656 :         BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
    5590     1169656 :         BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
    5591     1169656 :         BUG_ON(!inode_is_locked(inode1));
    5592     1169656 :         BUG_ON(!inode_is_locked(inode2));
    5593             : 
    5594     1169656 :         ext4_es_remove_extent(inode1, lblk1, count);
    5595     1169656 :         ext4_es_remove_extent(inode2, lblk2, count);
    5596             : 
    5597     3510466 :         while (count) {
    5598     2340810 :                 struct ext4_extent *ex1, *ex2, tmp_ex;
    5599     2340810 :                 ext4_lblk_t e1_blk, e2_blk;
    5600     2340810 :                 int e1_len, e2_len, len;
    5601     2340810 :                 int split = 0;
    5602             : 
    5603     2340810 :                 path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
    5604     2340810 :                 if (IS_ERR(path1)) {
    5605           0 :                         *erp = PTR_ERR(path1);
    5606           0 :                         path1 = NULL;
    5607           0 :                 finish:
    5608           0 :                         count = 0;
    5609           0 :                         goto repeat;
    5610             :                 }
    5611     2340810 :                 path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
    5612     2340810 :                 if (IS_ERR(path2)) {
    5613           0 :                         *erp = PTR_ERR(path2);
    5614           0 :                         path2 = NULL;
    5615           0 :                         goto finish;
    5616             :                 }
    5617     2340810 :                 ex1 = path1[path1->p_depth].p_ext;
    5618     2340810 :                 ex2 = path2[path2->p_depth].p_ext;
    5619             :                 /* Do we have something to swap ? */
    5620     2340810 :                 if (unlikely(!ex2 || !ex1))
    5621           0 :                         goto finish;
    5622             : 
    5623     2340810 :                 e1_blk = le32_to_cpu(ex1->ee_block);
    5624     2340810 :                 e2_blk = le32_to_cpu(ex2->ee_block);
    5625     2340810 :                 e1_len = ext4_ext_get_actual_len(ex1);
    5626     2340810 :                 e2_len = ext4_ext_get_actual_len(ex2);
    5627             : 
    5628             :                 /* Hole handling */
    5629     2340810 :                 if (!in_range(lblk1, e1_blk, e1_len) ||
    5630     2340406 :                     !in_range(lblk2, e2_blk, e2_len)) {
    5631         506 :                         ext4_lblk_t next1, next2;
    5632             : 
    5633             :                         /* if hole after extent, then go to next extent */
    5634         506 :                         next1 = ext4_ext_next_allocated_block(path1);
    5635         506 :                         next2 = ext4_ext_next_allocated_block(path2);
    5636             :                         /* If hole before extent, then shift to that extent */
    5637         506 :                         if (e1_blk > lblk1)
    5638           0 :                                 next1 = e1_blk;
    5639         506 :                         if (e2_blk > lblk2)
    5640         404 :                                 next2 = e2_blk;
    5641             :                         /* Do we have something to swap */
    5642         506 :                         if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
    5643           0 :                                 goto finish;
    5644             :                         /* Move to the rightest boundary */
    5645         506 :                         len = next1 - lblk1;
    5646         506 :                         if (len < next2 - lblk2)
    5647             :                                 len = next2 - lblk2;
    5648         506 :                         if (len > count)
    5649         506 :                                 len = count;
    5650         506 :                         lblk1 += len;
    5651         506 :                         lblk2 += len;
    5652         506 :                         count -= len;
    5653         506 :                         goto repeat;
    5654             :                 }
    5655             : 
    5656             :                 /* Prepare left boundary */
    5657     2340304 :                 if (e1_blk < lblk1) {
    5658        3229 :                         split = 1;
    5659        3229 :                         *erp = ext4_force_split_extent_at(handle, inode1,
    5660             :                                                 &path1, lblk1, 0);
    5661        3229 :                         if (unlikely(*erp))
    5662           0 :                                 goto finish;
    5663             :                 }
    5664     2340304 :                 if (e2_blk < lblk2) {
    5665        1537 :                         split = 1;
    5666        1537 :                         *erp = ext4_force_split_extent_at(handle, inode2,
    5667             :                                                 &path2,  lblk2, 0);
    5668        1537 :                         if (unlikely(*erp))
    5669           0 :                                 goto finish;
    5670             :                 }
    5671             :                 /* ext4_split_extent_at() may result in leaf extent split,
    5672             :                  * path must to be revalidated. */
    5673     2338767 :                 if (split)
    5674        3789 :                         goto repeat;
    5675             : 
    5676             :                 /* Prepare right boundary */
    5677     2336515 :                 len = count;
    5678     2336515 :                 if (len > e1_blk + e1_len - lblk1)
    5679             :                         len = e1_blk + e1_len - lblk1;
    5680     2336515 :                 if (len > e2_blk + e2_len - lblk2)
    5681           0 :                         len = e2_blk + e2_len - lblk2;
    5682             : 
    5683     2336515 :                 if (len != e1_len) {
    5684     1158044 :                         split = 1;
    5685     1158044 :                         *erp = ext4_force_split_extent_at(handle, inode1,
    5686             :                                                 &path1, lblk1 + len, 0);
    5687     1158044 :                         if (unlikely(*erp))
    5688           0 :                                 goto finish;
    5689             :                 }
    5690     2336515 :                 if (len != e2_len) {
    5691     1163684 :                         split = 1;
    5692     1163684 :                         *erp = ext4_force_split_extent_at(handle, inode2,
    5693             :                                                 &path2, lblk2 + len, 0);
    5694     1163684 :                         if (*erp)
    5695           0 :                                 goto finish;
    5696             :                 }
    5697             :                 /* ext4_split_extent_at() may result in leaf extent split,
    5698             :                  * path must to be revalidated. */
    5699     1172831 :                 if (split)
    5700     1167365 :                         goto repeat;
    5701             : 
    5702     1169150 :                 BUG_ON(e2_len != e1_len);
    5703     1169150 :                 *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
    5704     1169150 :                 if (unlikely(*erp))
    5705           0 :                         goto finish;
    5706     1169150 :                 *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
    5707     1169150 :                 if (unlikely(*erp))
    5708           0 :                         goto finish;
    5709             : 
    5710             :                 /* Both extents are fully inside boundaries. Swap it now */
    5711     1169150 :                 tmp_ex = *ex1;
    5712     1169150 :                 ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
    5713     1169150 :                 ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
    5714     1169150 :                 ex1->ee_len = cpu_to_le16(e2_len);
    5715     1169150 :                 ex2->ee_len = cpu_to_le16(e1_len);
    5716     1169150 :                 if (unwritten)
    5717     1169150 :                         ext4_ext_mark_unwritten(ex2);
    5718     1169150 :                 if (ext4_ext_is_unwritten(&tmp_ex))
    5719     1125553 :                         ext4_ext_mark_unwritten(ex1);
    5720             : 
    5721     1169150 :                 ext4_ext_try_to_merge(handle, inode2, path2, ex2);
    5722     1169150 :                 ext4_ext_try_to_merge(handle, inode1, path1, ex1);
    5723     1169150 :                 *erp = ext4_ext_dirty(handle, inode2, path2 +
    5724             :                                       path2->p_depth);
    5725     1169150 :                 if (unlikely(*erp))
    5726           0 :                         goto finish;
    5727     1169150 :                 *erp = ext4_ext_dirty(handle, inode1, path1 +
    5728             :                                       path1->p_depth);
    5729             :                 /*
    5730             :                  * Looks scarry ah..? second inode already points to new blocks,
    5731             :                  * and it was successfully dirtied. But luckily error may happen
    5732             :                  * only due to journal error, so full transaction will be
    5733             :                  * aborted anyway.
    5734             :                  */
    5735     1169150 :                 if (unlikely(*erp))
    5736           0 :                         goto finish;
    5737     1169150 :                 lblk1 += len;
    5738     1169150 :                 lblk2 += len;
    5739     1169150 :                 replaced_count += len;
    5740     1169150 :                 count -= len;
    5741             : 
    5742     2340810 :         repeat:
    5743     2340810 :                 ext4_free_ext_path(path1);
    5744     2340810 :                 ext4_free_ext_path(path2);
    5745     2340810 :                 path1 = path2 = NULL;
    5746             :         }
    5747     1169656 :         return replaced_count;
    5748             : }
    5749             : 
    5750             : /*
    5751             :  * ext4_clu_mapped - determine whether any block in a logical cluster has
    5752             :  *                   been mapped to a physical cluster
    5753             :  *
    5754             :  * @inode - file containing the logical cluster
    5755             :  * @lclu - logical cluster of interest
    5756             :  *
    5757             :  * Returns 1 if any block in the logical cluster is mapped, signifying
    5758             :  * that a physical cluster has been allocated for it.  Otherwise,
    5759             :  * returns 0.  Can also return negative error codes.  Derived from
    5760             :  * ext4_ext_map_blocks().
    5761             :  */
    5762        4096 : int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
    5763             : {
    5764        4096 :         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    5765        4096 :         struct ext4_ext_path *path;
    5766        4096 :         int depth, mapped = 0, err = 0;
    5767        4096 :         struct ext4_extent *extent;
    5768        4096 :         ext4_lblk_t first_lblk, first_lclu, last_lclu;
    5769             : 
    5770             :         /*
    5771             :          * if data can be stored inline, the logical cluster isn't
    5772             :          * mapped - no physical clusters have been allocated, and the
    5773             :          * file has no extents
    5774             :          */
    5775        4096 :         if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) ||
    5776             :             ext4_has_inline_data(inode))
    5777             :                 return 0;
    5778             : 
    5779             :         /* search for the extent closest to the first block in the cluster */
    5780        4096 :         path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
    5781        4096 :         if (IS_ERR(path)) {
    5782           0 :                 err = PTR_ERR(path);
    5783           0 :                 path = NULL;
    5784           0 :                 goto out;
    5785             :         }
    5786             : 
    5787        4096 :         depth = ext_depth(inode);
    5788             : 
    5789             :         /*
    5790             :          * A consistent leaf must not be empty.  This situation is possible,
    5791             :          * though, _during_ tree modification, and it's why an assert can't
    5792             :          * be put in ext4_find_extent().
    5793             :          */
    5794        4096 :         if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
    5795           0 :                 EXT4_ERROR_INODE(inode,
    5796             :                     "bad extent address - lblock: %lu, depth: %d, pblock: %lld",
    5797             :                                  (unsigned long) EXT4_C2B(sbi, lclu),
    5798             :                                  depth, path[depth].p_block);
    5799           0 :                 err = -EFSCORRUPTED;
    5800           0 :                 goto out;
    5801             :         }
    5802             : 
    5803        4096 :         extent = path[depth].p_ext;
    5804             : 
    5805             :         /* can't be mapped if the extent tree is empty */
    5806        4096 :         if (extent == NULL)
    5807        3867 :                 goto out;
    5808             : 
    5809         229 :         first_lblk = le32_to_cpu(extent->ee_block);
    5810         229 :         first_lclu = EXT4_B2C(sbi, first_lblk);
    5811             : 
    5812             :         /*
    5813             :          * Three possible outcomes at this point - found extent spanning
    5814             :          * the target cluster, to the left of the target cluster, or to the
    5815             :          * right of the target cluster.  The first two cases are handled here.
    5816             :          * The last case indicates the target cluster is not mapped.
    5817             :          */
    5818         229 :         if (lclu >= first_lclu) {
    5819         458 :                 last_lclu = EXT4_B2C(sbi, first_lblk +
    5820             :                                      ext4_ext_get_actual_len(extent) - 1);
    5821         229 :                 if (lclu <= last_lclu) {
    5822             :                         mapped = 1;
    5823             :                 } else {
    5824         229 :                         first_lblk = ext4_ext_next_allocated_block(path);
    5825         229 :                         first_lclu = EXT4_B2C(sbi, first_lblk);
    5826         229 :                         if (lclu == first_lclu)
    5827           0 :                                 mapped = 1;
    5828             :                 }
    5829             :         }
    5830             : 
    5831         229 : out:
    5832        4096 :         ext4_free_ext_path(path);
    5833             : 
    5834        4096 :         return err ? err : mapped;
    5835             : }
    5836             : 
    5837             : /*
    5838             :  * Updates physical block address and unwritten status of extent
    5839             :  * starting at lblk start and of len. If such an extent doesn't exist,
    5840             :  * this function splits the extent tree appropriately to create an
    5841             :  * extent like this.  This function is called in the fast commit
    5842             :  * replay path.  Returns 0 on success and error on failure.
    5843             :  */
    5844           0 : int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
    5845             :                               int len, int unwritten, ext4_fsblk_t pblk)
    5846             : {
    5847           0 :         struct ext4_ext_path *path = NULL, *ppath;
    5848           0 :         struct ext4_extent *ex;
    5849           0 :         int ret;
    5850             : 
    5851           0 :         path = ext4_find_extent(inode, start, NULL, 0);
    5852           0 :         if (IS_ERR(path))
    5853           0 :                 return PTR_ERR(path);
    5854           0 :         ex = path[path->p_depth].p_ext;
    5855           0 :         if (!ex) {
    5856           0 :                 ret = -EFSCORRUPTED;
    5857           0 :                 goto out;
    5858             :         }
    5859             : 
    5860           0 :         if (le32_to_cpu(ex->ee_block) != start ||
    5861             :                 ext4_ext_get_actual_len(ex) != len) {
    5862             :                 /* We need to split this extent to match our extent first */
    5863           0 :                 ppath = path;
    5864           0 :                 down_write(&EXT4_I(inode)->i_data_sem);
    5865           0 :                 ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1);
    5866           0 :                 up_write(&EXT4_I(inode)->i_data_sem);
    5867           0 :                 if (ret)
    5868           0 :                         goto out;
    5869           0 :                 kfree(path);
    5870           0 :                 path = ext4_find_extent(inode, start, NULL, 0);
    5871           0 :                 if (IS_ERR(path))
    5872             :                         return -1;
    5873           0 :                 ppath = path;
    5874           0 :                 ex = path[path->p_depth].p_ext;
    5875           0 :                 WARN_ON(le32_to_cpu(ex->ee_block) != start);
    5876           0 :                 if (ext4_ext_get_actual_len(ex) != len) {
    5877           0 :                         down_write(&EXT4_I(inode)->i_data_sem);
    5878           0 :                         ret = ext4_force_split_extent_at(NULL, inode, &ppath,
    5879             :                                                          start + len, 1);
    5880           0 :                         up_write(&EXT4_I(inode)->i_data_sem);
    5881           0 :                         if (ret)
    5882           0 :                                 goto out;
    5883           0 :                         kfree(path);
    5884           0 :                         path = ext4_find_extent(inode, start, NULL, 0);
    5885           0 :                         if (IS_ERR(path))
    5886             :                                 return -EINVAL;
    5887           0 :                         ex = path[path->p_depth].p_ext;
    5888             :                 }
    5889             :         }
    5890           0 :         if (unwritten)
    5891           0 :                 ext4_ext_mark_unwritten(ex);
    5892             :         else
    5893           0 :                 ext4_ext_mark_initialized(ex);
    5894           0 :         ext4_ext_store_pblock(ex, pblk);
    5895           0 :         down_write(&EXT4_I(inode)->i_data_sem);
    5896           0 :         ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
    5897           0 :         up_write(&EXT4_I(inode)->i_data_sem);
    5898           0 : out:
    5899           0 :         ext4_free_ext_path(path);
    5900           0 :         ext4_mark_inode_dirty(NULL, inode);
    5901           0 :         return ret;
    5902             : }
    5903             : 
    5904             : /* Try to shrink the extent tree */
    5905           0 : void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
    5906             : {
    5907           0 :         struct ext4_ext_path *path = NULL;
    5908           0 :         struct ext4_extent *ex;
    5909           0 :         ext4_lblk_t old_cur, cur = 0;
    5910             : 
    5911           0 :         while (cur < end) {
    5912           0 :                 path = ext4_find_extent(inode, cur, NULL, 0);
    5913           0 :                 if (IS_ERR(path))
    5914             :                         return;
    5915           0 :                 ex = path[path->p_depth].p_ext;
    5916           0 :                 if (!ex) {
    5917           0 :                         ext4_free_ext_path(path);
    5918           0 :                         ext4_mark_inode_dirty(NULL, inode);
    5919           0 :                         return;
    5920             :                 }
    5921           0 :                 old_cur = cur;
    5922           0 :                 cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
    5923           0 :                 if (cur <= old_cur)
    5924           0 :                         cur = old_cur + 1;
    5925           0 :                 ext4_ext_try_to_merge(NULL, inode, path, ex);
    5926           0 :                 down_write(&EXT4_I(inode)->i_data_sem);
    5927           0 :                 ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
    5928           0 :                 up_write(&EXT4_I(inode)->i_data_sem);
    5929           0 :                 ext4_mark_inode_dirty(NULL, inode);
    5930           0 :                 ext4_free_ext_path(path);
    5931             :         }
    5932             : }
    5933             : 
    5934             : /* Check if *cur is a hole and if it is, skip it */
    5935           0 : static int skip_hole(struct inode *inode, ext4_lblk_t *cur)
    5936             : {
    5937           0 :         int ret;
    5938           0 :         struct ext4_map_blocks map;
    5939             : 
    5940           0 :         map.m_lblk = *cur;
    5941           0 :         map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur;
    5942             : 
    5943           0 :         ret = ext4_map_blocks(NULL, inode, &map, 0);
    5944           0 :         if (ret < 0)
    5945             :                 return ret;
    5946           0 :         if (ret != 0)
    5947             :                 return 0;
    5948           0 :         *cur = *cur + map.m_len;
    5949           0 :         return 0;
    5950             : }
    5951             : 
    5952             : /* Count number of blocks used by this inode and update i_blocks */
    5953           0 : int ext4_ext_replay_set_iblocks(struct inode *inode)
    5954             : {
    5955           0 :         struct ext4_ext_path *path = NULL, *path2 = NULL;
    5956           0 :         struct ext4_extent *ex;
    5957           0 :         ext4_lblk_t cur = 0, end;
    5958           0 :         int numblks = 0, i, ret = 0;
    5959           0 :         ext4_fsblk_t cmp1, cmp2;
    5960           0 :         struct ext4_map_blocks map;
    5961             : 
    5962             :         /* Determin the size of the file first */
    5963           0 :         path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
    5964             :                                         EXT4_EX_NOCACHE);
    5965           0 :         if (IS_ERR(path))
    5966           0 :                 return PTR_ERR(path);
    5967           0 :         ex = path[path->p_depth].p_ext;
    5968           0 :         if (!ex) {
    5969           0 :                 ext4_free_ext_path(path);
    5970           0 :                 goto out;
    5971             :         }
    5972           0 :         end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
    5973           0 :         ext4_free_ext_path(path);
    5974             : 
    5975             :         /* Count the number of data blocks */
    5976           0 :         cur = 0;
    5977           0 :         while (cur < end) {
    5978           0 :                 map.m_lblk = cur;
    5979           0 :                 map.m_len = end - cur;
    5980           0 :                 ret = ext4_map_blocks(NULL, inode, &map, 0);
    5981           0 :                 if (ret < 0)
    5982             :                         break;
    5983           0 :                 if (ret > 0)
    5984           0 :                         numblks += ret;
    5985           0 :                 cur = cur + map.m_len;
    5986             :         }
    5987             : 
    5988             :         /*
    5989             :          * Count the number of extent tree blocks. We do it by looking up
    5990             :          * two successive extents and determining the difference between
    5991             :          * their paths. When path is different for 2 successive extents
    5992             :          * we compare the blocks in the path at each level and increment
    5993             :          * iblocks by total number of differences found.
    5994             :          */
    5995           0 :         cur = 0;
    5996           0 :         ret = skip_hole(inode, &cur);
    5997           0 :         if (ret < 0)
    5998           0 :                 goto out;
    5999           0 :         path = ext4_find_extent(inode, cur, NULL, 0);
    6000           0 :         if (IS_ERR(path))
    6001           0 :                 goto out;
    6002           0 :         numblks += path->p_depth;
    6003           0 :         ext4_free_ext_path(path);
    6004           0 :         while (cur < end) {
    6005           0 :                 path = ext4_find_extent(inode, cur, NULL, 0);
    6006           0 :                 if (IS_ERR(path))
    6007             :                         break;
    6008           0 :                 ex = path[path->p_depth].p_ext;
    6009           0 :                 if (!ex) {
    6010           0 :                         ext4_free_ext_path(path);
    6011           0 :                         return 0;
    6012             :                 }
    6013           0 :                 cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
    6014             :                                         ext4_ext_get_actual_len(ex));
    6015           0 :                 ret = skip_hole(inode, &cur);
    6016           0 :                 if (ret < 0) {
    6017           0 :                         ext4_free_ext_path(path);
    6018             :                         break;
    6019             :                 }
    6020           0 :                 path2 = ext4_find_extent(inode, cur, NULL, 0);
    6021           0 :                 if (IS_ERR(path2)) {
    6022           0 :                         ext4_free_ext_path(path);
    6023             :                         break;
    6024             :                 }
    6025           0 :                 for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
    6026           0 :                         cmp1 = cmp2 = 0;
    6027           0 :                         if (i <= path->p_depth)
    6028           0 :                                 cmp1 = path[i].p_bh ?
    6029           0 :                                         path[i].p_bh->b_blocknr : 0;
    6030           0 :                         if (i <= path2->p_depth)
    6031           0 :                                 cmp2 = path2[i].p_bh ?
    6032           0 :                                         path2[i].p_bh->b_blocknr : 0;
    6033           0 :                         if (cmp1 != cmp2 && cmp2 != 0)
    6034           0 :                                 numblks++;
    6035             :                 }
    6036           0 :                 ext4_free_ext_path(path);
    6037           0 :                 ext4_free_ext_path(path2);
    6038             :         }
    6039             : 
    6040           0 : out:
    6041           0 :         inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9);
    6042           0 :         ext4_mark_inode_dirty(NULL, inode);
    6043           0 :         return 0;
    6044             : }
    6045             : 
    6046           0 : int ext4_ext_clear_bb(struct inode *inode)
    6047             : {
    6048           0 :         struct ext4_ext_path *path = NULL;
    6049           0 :         struct ext4_extent *ex;
    6050           0 :         ext4_lblk_t cur = 0, end;
    6051           0 :         int j, ret = 0;
    6052           0 :         struct ext4_map_blocks map;
    6053             : 
    6054           0 :         if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
    6055             :                 return 0;
    6056             : 
    6057             :         /* Determin the size of the file first */
    6058           0 :         path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
    6059             :                                         EXT4_EX_NOCACHE);
    6060           0 :         if (IS_ERR(path))
    6061           0 :                 return PTR_ERR(path);
    6062           0 :         ex = path[path->p_depth].p_ext;
    6063           0 :         if (!ex) {
    6064           0 :                 ext4_free_ext_path(path);
    6065           0 :                 return 0;
    6066             :         }
    6067           0 :         end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
    6068           0 :         ext4_free_ext_path(path);
    6069             : 
    6070           0 :         cur = 0;
    6071           0 :         while (cur < end) {
    6072           0 :                 map.m_lblk = cur;
    6073           0 :                 map.m_len = end - cur;
    6074           0 :                 ret = ext4_map_blocks(NULL, inode, &map, 0);
    6075           0 :                 if (ret < 0)
    6076             :                         break;
    6077           0 :                 if (ret > 0) {
    6078           0 :                         path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
    6079           0 :                         if (!IS_ERR_OR_NULL(path)) {
    6080           0 :                                 for (j = 0; j < path->p_depth; j++) {
    6081             : 
    6082           0 :                                         ext4_mb_mark_bb(inode->i_sb,
    6083           0 :                                                         path[j].p_block, 1, 0);
    6084           0 :                                         ext4_fc_record_regions(inode->i_sb, inode->i_ino,
    6085             :                                                         0, path[j].p_block, 1, 1);
    6086             :                                 }
    6087           0 :                                 ext4_free_ext_path(path);
    6088             :                         }
    6089           0 :                         ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
    6090           0 :                         ext4_fc_record_regions(inode->i_sb, inode->i_ino,
    6091           0 :                                         map.m_lblk, map.m_pblk, map.m_len, 1);
    6092             :                 }
    6093           0 :                 cur = cur + map.m_len;
    6094             :         }
    6095             : 
    6096             :         return 0;
    6097             : }

Generated by: LCOV version 1.14