Line data Source code
1 : // SPDX-License-Identifier: LGPL-2.1
2 : /*
3 : * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
4 : * Written by Takashi Sato <t-sato@yk.jp.nec.com>
5 : * Akira Fujita <a-fujita@rs.jp.nec.com>
6 : */
7 :
8 : #include <linux/fs.h>
9 : #include <linux/quotaops.h>
10 : #include <linux/slab.h>
11 : #include <linux/sched/mm.h>
12 : #include "ext4_jbd2.h"
13 : #include "ext4.h"
14 : #include "ext4_extents.h"
15 :
16 : /**
17 : * get_ext_path() - Find an extent path for designated logical block number.
18 : * @inode: inode to be searched
19 : * @lblock: logical block number to find an extent path
20 : * @ppath: pointer to an extent path pointer (for output)
21 : *
22 : * ext4_find_extent wrapper. Return 0 on success, or a negative error value
23 : * on failure.
24 : */
25 : static inline int
26 0 : get_ext_path(struct inode *inode, ext4_lblk_t lblock,
27 : struct ext4_ext_path **ppath)
28 : {
29 0 : struct ext4_ext_path *path;
30 :
31 0 : path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
32 0 : if (IS_ERR(path))
33 0 : return PTR_ERR(path);
34 0 : if (path[ext_depth(inode)].p_ext == NULL) {
35 0 : ext4_free_ext_path(path);
36 0 : *ppath = NULL;
37 0 : return -ENODATA;
38 : }
39 0 : *ppath = path;
40 0 : return 0;
41 : }
42 :
43 : /**
44 : * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem
45 : * @first: inode to be locked
46 : * @second: inode to be locked
47 : *
48 : * Acquire write lock of i_data_sem of the two inodes
49 : */
50 : void
51 0 : ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
52 : {
53 0 : if (first < second) {
54 0 : down_write(&EXT4_I(first)->i_data_sem);
55 0 : down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER);
56 : } else {
57 0 : down_write(&EXT4_I(second)->i_data_sem);
58 0 : down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER);
59 :
60 : }
61 0 : }
62 :
63 : /**
64 : * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem
65 : *
66 : * @orig_inode: original inode structure to be released its lock first
67 : * @donor_inode: donor inode structure to be released its lock second
68 : * Release write lock of i_data_sem of two inodes (orig and donor).
69 : */
70 : void
71 0 : ext4_double_up_write_data_sem(struct inode *orig_inode,
72 : struct inode *donor_inode)
73 : {
74 0 : up_write(&EXT4_I(orig_inode)->i_data_sem);
75 0 : up_write(&EXT4_I(donor_inode)->i_data_sem);
76 0 : }
77 :
78 : /**
79 : * mext_check_coverage - Check that all extents in range has the same type
80 : *
81 : * @inode: inode in question
82 : * @from: block offset of inode
83 : * @count: block count to be checked
84 : * @unwritten: extents expected to be unwritten
85 : * @err: pointer to save error value
86 : *
87 : * Return 1 if all extents in range has expected type, and zero otherwise.
88 : */
89 : static int
90 0 : mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
91 : int unwritten, int *err)
92 : {
93 0 : struct ext4_ext_path *path = NULL;
94 0 : struct ext4_extent *ext;
95 0 : int ret = 0;
96 0 : ext4_lblk_t last = from + count;
97 0 : while (from < last) {
98 0 : *err = get_ext_path(inode, from, &path);
99 0 : if (*err)
100 0 : goto out;
101 0 : ext = path[ext_depth(inode)].p_ext;
102 0 : if (unwritten != ext4_ext_is_unwritten(ext))
103 0 : goto out;
104 0 : from += ext4_ext_get_actual_len(ext);
105 : }
106 : ret = 1;
107 0 : out:
108 0 : ext4_free_ext_path(path);
109 0 : return ret;
110 : }
111 :
112 : /**
113 : * mext_folio_double_lock - Grab and lock folio on both @inode1 and @inode2
114 : *
115 : * @inode1: the inode structure
116 : * @inode2: the inode structure
117 : * @index1: folio index
118 : * @index2: folio index
119 : * @folio: result folio vector
120 : *
121 : * Grab two locked folio for inode's by inode order
122 : */
123 : static int
124 0 : mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
125 : pgoff_t index1, pgoff_t index2, struct folio *folio[2])
126 : {
127 0 : struct address_space *mapping[2];
128 0 : unsigned int flags;
129 :
130 0 : BUG_ON(!inode1 || !inode2);
131 0 : if (inode1 < inode2) {
132 0 : mapping[0] = inode1->i_mapping;
133 0 : mapping[1] = inode2->i_mapping;
134 : } else {
135 0 : swap(index1, index2);
136 0 : mapping[0] = inode2->i_mapping;
137 0 : mapping[1] = inode1->i_mapping;
138 : }
139 :
140 0 : flags = memalloc_nofs_save();
141 0 : folio[0] = __filemap_get_folio(mapping[0], index1, FGP_WRITEBEGIN,
142 : mapping_gfp_mask(mapping[0]));
143 0 : if (IS_ERR(folio[0])) {
144 0 : memalloc_nofs_restore(flags);
145 0 : return PTR_ERR(folio[0]);
146 : }
147 :
148 0 : folio[1] = __filemap_get_folio(mapping[1], index2, FGP_WRITEBEGIN,
149 : mapping_gfp_mask(mapping[1]));
150 0 : memalloc_nofs_restore(flags);
151 0 : if (IS_ERR(folio[1])) {
152 0 : folio_unlock(folio[0]);
153 0 : folio_put(folio[0]);
154 0 : return PTR_ERR(folio[1]);
155 : }
156 : /*
157 : * __filemap_get_folio() may not wait on folio's writeback if
158 : * BDI not demand that. But it is reasonable to be very conservative
159 : * here and explicitly wait on folio's writeback
160 : */
161 0 : folio_wait_writeback(folio[0]);
162 0 : folio_wait_writeback(folio[1]);
163 0 : if (inode1 > inode2)
164 0 : swap(folio[0], folio[1]);
165 :
166 : return 0;
167 : }
168 :
169 : /* Force page buffers uptodate w/o dropping page's lock */
170 : static int
171 0 : mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
172 : {
173 0 : struct inode *inode = folio->mapping->host;
174 0 : sector_t block;
175 0 : struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
176 0 : unsigned int blocksize, block_start, block_end;
177 0 : int i, err, nr = 0, partial = 0;
178 0 : BUG_ON(!folio_test_locked(folio));
179 0 : BUG_ON(folio_test_writeback(folio));
180 :
181 0 : if (folio_test_uptodate(folio))
182 : return 0;
183 :
184 0 : blocksize = i_blocksize(inode);
185 0 : head = folio_buffers(folio);
186 0 : if (!head) {
187 0 : create_empty_buffers(&folio->page, blocksize, 0);
188 0 : head = folio_buffers(folio);
189 : }
190 :
191 0 : block = (sector_t)folio->index << (PAGE_SHIFT - inode->i_blkbits);
192 0 : for (bh = head, block_start = 0; bh != head || !block_start;
193 0 : block++, block_start = block_end, bh = bh->b_this_page) {
194 0 : block_end = block_start + blocksize;
195 0 : if (block_end <= from || block_start >= to) {
196 0 : if (!buffer_uptodate(bh))
197 0 : partial = 1;
198 0 : continue;
199 : }
200 0 : if (buffer_uptodate(bh))
201 0 : continue;
202 0 : if (!buffer_mapped(bh)) {
203 0 : err = ext4_get_block(inode, block, bh, 0);
204 0 : if (err) {
205 0 : folio_set_error(folio);
206 0 : return err;
207 : }
208 0 : if (!buffer_mapped(bh)) {
209 0 : folio_zero_range(folio, block_start, blocksize);
210 0 : set_buffer_uptodate(bh);
211 0 : continue;
212 : }
213 : }
214 0 : BUG_ON(nr >= MAX_BUF_PER_PAGE);
215 0 : arr[nr++] = bh;
216 : }
217 : /* No io required */
218 0 : if (!nr)
219 0 : goto out;
220 :
221 0 : for (i = 0; i < nr; i++) {
222 0 : bh = arr[i];
223 0 : if (!bh_uptodate_or_lock(bh)) {
224 0 : err = ext4_read_bh(bh, 0, NULL);
225 0 : if (err)
226 0 : return err;
227 : }
228 : }
229 0 : out:
230 0 : if (!partial)
231 0 : folio_mark_uptodate(folio);
232 : return 0;
233 : }
234 :
235 : /**
236 : * move_extent_per_page - Move extent data per page
237 : *
238 : * @o_filp: file structure of original file
239 : * @donor_inode: donor inode
240 : * @orig_page_offset: page index on original file
241 : * @donor_page_offset: page index on donor file
242 : * @data_offset_in_page: block index where data swapping starts
243 : * @block_len_in_page: the number of blocks to be swapped
244 : * @unwritten: orig extent is unwritten or not
245 : * @err: pointer to save return value
246 : *
247 : * Save the data in original inode blocks and replace original inode extents
248 : * with donor inode extents by calling ext4_swap_extents().
249 : * Finally, write out the saved data in new original inode blocks. Return
250 : * replaced block count.
251 : */
252 : static int
253 0 : move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
254 : pgoff_t orig_page_offset, pgoff_t donor_page_offset,
255 : int data_offset_in_page,
256 : int block_len_in_page, int unwritten, int *err)
257 : {
258 0 : struct inode *orig_inode = file_inode(o_filp);
259 0 : struct folio *folio[2] = {NULL, NULL};
260 0 : handle_t *handle;
261 0 : ext4_lblk_t orig_blk_offset, donor_blk_offset;
262 0 : unsigned long blocksize = orig_inode->i_sb->s_blocksize;
263 0 : unsigned int tmp_data_size, data_size, replaced_size;
264 0 : int i, err2, jblocks, retries = 0;
265 0 : int replaced_count = 0;
266 0 : int from = data_offset_in_page << orig_inode->i_blkbits;
267 0 : int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
268 0 : struct super_block *sb = orig_inode->i_sb;
269 0 : struct buffer_head *bh = NULL;
270 :
271 : /*
272 : * It needs twice the amount of ordinary journal buffers because
273 : * inode and donor_inode may change each different metadata blocks.
274 : */
275 : again:
276 0 : *err = 0;
277 0 : jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
278 0 : handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);
279 0 : if (IS_ERR(handle)) {
280 0 : *err = PTR_ERR(handle);
281 0 : return 0;
282 : }
283 :
284 0 : orig_blk_offset = orig_page_offset * blocks_per_page +
285 : data_offset_in_page;
286 :
287 0 : donor_blk_offset = donor_page_offset * blocks_per_page +
288 : data_offset_in_page;
289 :
290 : /* Calculate data_size */
291 0 : if ((orig_blk_offset + block_len_in_page - 1) ==
292 0 : ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
293 : /* Replace the last block */
294 0 : tmp_data_size = orig_inode->i_size & (blocksize - 1);
295 : /*
296 : * If data_size equal zero, it shows data_size is multiples of
297 : * blocksize. So we set appropriate value.
298 : */
299 0 : if (tmp_data_size == 0)
300 0 : tmp_data_size = blocksize;
301 :
302 0 : data_size = tmp_data_size +
303 0 : ((block_len_in_page - 1) << orig_inode->i_blkbits);
304 : } else
305 0 : data_size = block_len_in_page << orig_inode->i_blkbits;
306 :
307 0 : replaced_size = data_size;
308 :
309 0 : *err = mext_folio_double_lock(orig_inode, donor_inode, orig_page_offset,
310 : donor_page_offset, folio);
311 0 : if (unlikely(*err < 0))
312 0 : goto stop_journal;
313 : /*
314 : * If orig extent was unwritten it can become initialized
315 : * at any time after i_data_sem was dropped, in order to
316 : * serialize with delalloc we have recheck extent while we
317 : * hold page's lock, if it is still the case data copy is not
318 : * necessary, just swap data blocks between orig and donor.
319 : */
320 :
321 0 : VM_BUG_ON_FOLIO(folio_test_large(folio[0]), folio[0]);
322 0 : VM_BUG_ON_FOLIO(folio_test_large(folio[1]), folio[1]);
323 0 : VM_BUG_ON_FOLIO(folio_nr_pages(folio[0]) != folio_nr_pages(folio[1]), folio[1]);
324 :
325 0 : if (unwritten) {
326 0 : ext4_double_down_write_data_sem(orig_inode, donor_inode);
327 : /* If any of extents in range became initialized we have to
328 : * fallback to data copying */
329 0 : unwritten = mext_check_coverage(orig_inode, orig_blk_offset,
330 : block_len_in_page, 1, err);
331 0 : if (*err)
332 0 : goto drop_data_sem;
333 :
334 0 : unwritten &= mext_check_coverage(donor_inode, donor_blk_offset,
335 : block_len_in_page, 1, err);
336 0 : if (*err)
337 0 : goto drop_data_sem;
338 :
339 0 : if (!unwritten) {
340 0 : ext4_double_up_write_data_sem(orig_inode, donor_inode);
341 0 : goto data_copy;
342 : }
343 0 : if ((folio_has_private(folio[0]) &&
344 0 : !filemap_release_folio(folio[0], 0)) ||
345 0 : (folio_has_private(folio[1]) &&
346 0 : !filemap_release_folio(folio[1], 0))) {
347 0 : *err = -EBUSY;
348 0 : goto drop_data_sem;
349 : }
350 0 : replaced_count = ext4_swap_extents(handle, orig_inode,
351 : donor_inode, orig_blk_offset,
352 : donor_blk_offset,
353 : block_len_in_page, 1, err);
354 0 : drop_data_sem:
355 0 : ext4_double_up_write_data_sem(orig_inode, donor_inode);
356 0 : goto unlock_folios;
357 : }
358 0 : data_copy:
359 0 : *err = mext_page_mkuptodate(folio[0], from, from + replaced_size);
360 0 : if (*err)
361 0 : goto unlock_folios;
362 :
363 : /* At this point all buffers in range are uptodate, old mapping layout
364 : * is no longer required, try to drop it now. */
365 0 : if ((folio_has_private(folio[0]) &&
366 0 : !filemap_release_folio(folio[0], 0)) ||
367 0 : (folio_has_private(folio[1]) &&
368 0 : !filemap_release_folio(folio[1], 0))) {
369 0 : *err = -EBUSY;
370 0 : goto unlock_folios;
371 : }
372 0 : ext4_double_down_write_data_sem(orig_inode, donor_inode);
373 0 : replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
374 : orig_blk_offset, donor_blk_offset,
375 : block_len_in_page, 1, err);
376 0 : ext4_double_up_write_data_sem(orig_inode, donor_inode);
377 0 : if (*err) {
378 0 : if (replaced_count) {
379 0 : block_len_in_page = replaced_count;
380 0 : replaced_size =
381 0 : block_len_in_page << orig_inode->i_blkbits;
382 : } else
383 0 : goto unlock_folios;
384 : }
385 : /* Perform all necessary steps similar write_begin()/write_end()
386 : * but keeping in mind that i_size will not change */
387 0 : if (!folio_buffers(folio[0]))
388 0 : create_empty_buffers(&folio[0]->page, 1 << orig_inode->i_blkbits, 0);
389 0 : bh = folio_buffers(folio[0]);
390 0 : for (i = 0; i < data_offset_in_page; i++)
391 0 : bh = bh->b_this_page;
392 0 : for (i = 0; i < block_len_in_page; i++) {
393 0 : *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
394 0 : if (*err < 0)
395 : break;
396 0 : bh = bh->b_this_page;
397 : }
398 0 : if (!*err)
399 0 : *err = block_commit_write(&folio[0]->page, from, from + replaced_size);
400 :
401 0 : if (unlikely(*err < 0))
402 0 : goto repair_branches;
403 :
404 : /* Even in case of data=writeback it is reasonable to pin
405 : * inode to transaction, to prevent unexpected data loss */
406 0 : *err = ext4_jbd2_inode_add_write(handle, orig_inode,
407 0 : (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size);
408 :
409 0 : unlock_folios:
410 0 : folio_unlock(folio[0]);
411 0 : folio_put(folio[0]);
412 0 : folio_unlock(folio[1]);
413 0 : folio_put(folio[1]);
414 0 : stop_journal:
415 0 : ext4_journal_stop(handle);
416 0 : if (*err == -ENOSPC &&
417 0 : ext4_should_retry_alloc(sb, &retries))
418 0 : goto again;
419 : /* Buffer was busy because probably is pinned to journal transaction,
420 : * force transaction commit may help to free it. */
421 0 : if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal &&
422 0 : jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal))
423 0 : goto again;
424 : return replaced_count;
425 :
426 : repair_branches:
427 : /*
428 : * This should never ever happen!
429 : * Extents are swapped already, but we are not able to copy data.
430 : * Try to swap extents to it's original places
431 : */
432 0 : ext4_double_down_write_data_sem(orig_inode, donor_inode);
433 0 : replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode,
434 : orig_blk_offset, donor_blk_offset,
435 : block_len_in_page, 0, &err2);
436 0 : ext4_double_up_write_data_sem(orig_inode, donor_inode);
437 0 : if (replaced_count != block_len_in_page) {
438 0 : ext4_error_inode_block(orig_inode, (sector_t)(orig_blk_offset),
439 : EIO, "Unable to copy data block,"
440 : " data will be lost.");
441 0 : *err = -EIO;
442 : }
443 0 : replaced_count = 0;
444 0 : goto unlock_folios;
445 : }
446 :
447 : /**
448 : * mext_check_arguments - Check whether move extent can be done
449 : *
450 : * @orig_inode: original inode
451 : * @donor_inode: donor inode
452 : * @orig_start: logical start offset in block for orig
453 : * @donor_start: logical start offset in block for donor
454 : * @len: the number of blocks to be moved
455 : *
456 : * Check the arguments of ext4_move_extents() whether the files can be
457 : * exchanged with each other.
458 : * Return 0 on success, or a negative error value on failure.
459 : */
460 : static int
461 0 : mext_check_arguments(struct inode *orig_inode,
462 : struct inode *donor_inode, __u64 orig_start,
463 : __u64 donor_start, __u64 *len)
464 : {
465 0 : __u64 orig_eof, donor_eof;
466 0 : unsigned int blkbits = orig_inode->i_blkbits;
467 0 : unsigned int blocksize = 1 << blkbits;
468 :
469 0 : orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits;
470 0 : donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits;
471 :
472 :
473 0 : if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
474 : ext4_debug("ext4 move extent: suid or sgid is set"
475 : " to donor file [ino:orig %lu, donor %lu]\n",
476 : orig_inode->i_ino, donor_inode->i_ino);
477 : return -EINVAL;
478 : }
479 :
480 0 : if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
481 : return -EPERM;
482 :
483 : /* Ext4 move extent does not support swap files */
484 0 : if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
485 : ext4_debug("ext4 move extent: The argument files should not be swap files [ino:orig %lu, donor %lu]\n",
486 : orig_inode->i_ino, donor_inode->i_ino);
487 : return -ETXTBSY;
488 : }
489 :
490 0 : if (ext4_is_quota_file(orig_inode) && ext4_is_quota_file(donor_inode)) {
491 : ext4_debug("ext4 move extent: The argument files should not be quota files [ino:orig %lu, donor %lu]\n",
492 : orig_inode->i_ino, donor_inode->i_ino);
493 : return -EOPNOTSUPP;
494 : }
495 :
496 : /* Ext4 move extent supports only extent based file */
497 0 : if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
498 : ext4_debug("ext4 move extent: orig file is not extents "
499 : "based file [ino:orig %lu]\n", orig_inode->i_ino);
500 : return -EOPNOTSUPP;
501 0 : } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
502 : ext4_debug("ext4 move extent: donor file is not extents "
503 : "based file [ino:donor %lu]\n", donor_inode->i_ino);
504 : return -EOPNOTSUPP;
505 : }
506 :
507 0 : if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
508 : ext4_debug("ext4 move extent: File size is 0 byte\n");
509 : return -EINVAL;
510 : }
511 :
512 : /* Start offset should be same */
513 0 : if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
514 0 : (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
515 : ext4_debug("ext4 move extent: orig and donor's start "
516 : "offsets are not aligned [ino:orig %lu, donor %lu]\n",
517 : orig_inode->i_ino, donor_inode->i_ino);
518 : return -EINVAL;
519 : }
520 :
521 0 : if ((orig_start >= EXT_MAX_BLOCKS) ||
522 0 : (donor_start >= EXT_MAX_BLOCKS) ||
523 0 : (*len > EXT_MAX_BLOCKS) ||
524 0 : (donor_start + *len >= EXT_MAX_BLOCKS) ||
525 0 : (orig_start + *len >= EXT_MAX_BLOCKS)) {
526 : ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
527 : "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
528 : orig_inode->i_ino, donor_inode->i_ino);
529 : return -EINVAL;
530 : }
531 0 : if (orig_eof <= orig_start)
532 0 : *len = 0;
533 0 : else if (orig_eof < orig_start + *len - 1)
534 0 : *len = orig_eof - orig_start;
535 0 : if (donor_eof <= donor_start)
536 0 : *len = 0;
537 0 : else if (donor_eof < donor_start + *len - 1)
538 0 : *len = donor_eof - donor_start;
539 0 : if (!*len) {
540 0 : ext4_debug("ext4 move extent: len should not be 0 "
541 : "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
542 : donor_inode->i_ino);
543 0 : return -EINVAL;
544 : }
545 :
546 : return 0;
547 : }
548 :
549 : /**
550 : * ext4_move_extents - Exchange the specified range of a file
551 : *
552 : * @o_filp: file structure of the original file
553 : * @d_filp: file structure of the donor file
554 : * @orig_blk: start offset in block for orig
555 : * @donor_blk: start offset in block for donor
556 : * @len: the number of blocks to be moved
557 : * @moved_len: moved block length
558 : *
559 : * This function returns 0 and moved block length is set in moved_len
560 : * if succeed, otherwise returns error value.
561 : *
562 : */
563 : int
564 0 : ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
565 : __u64 donor_blk, __u64 len, __u64 *moved_len)
566 : {
567 0 : struct inode *orig_inode = file_inode(o_filp);
568 0 : struct inode *donor_inode = file_inode(d_filp);
569 0 : struct ext4_ext_path *path = NULL;
570 0 : int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
571 0 : ext4_lblk_t o_end, o_start = orig_blk;
572 0 : ext4_lblk_t d_start = donor_blk;
573 0 : int ret;
574 :
575 0 : if (orig_inode->i_sb != donor_inode->i_sb) {
576 : ext4_debug("ext4 move extent: The argument files "
577 : "should be in same FS [ino:orig %lu, donor %lu]\n",
578 : orig_inode->i_ino, donor_inode->i_ino);
579 : return -EINVAL;
580 : }
581 :
582 : /* orig and donor should be different inodes */
583 0 : if (orig_inode == donor_inode) {
584 : ext4_debug("ext4 move extent: The argument files should not "
585 : "be same inode [ino:orig %lu, donor %lu]\n",
586 : orig_inode->i_ino, donor_inode->i_ino);
587 : return -EINVAL;
588 : }
589 :
590 : /* Regular file check */
591 0 : if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
592 : ext4_debug("ext4 move extent: The argument files should be "
593 : "regular file [ino:orig %lu, donor %lu]\n",
594 : orig_inode->i_ino, donor_inode->i_ino);
595 : return -EINVAL;
596 : }
597 :
598 : /* TODO: it's not obvious how to swap blocks for inodes with full
599 : journaling enabled */
600 0 : if (ext4_should_journal_data(orig_inode) ||
601 : ext4_should_journal_data(donor_inode)) {
602 0 : ext4_msg(orig_inode->i_sb, KERN_ERR,
603 : "Online defrag not supported with data journaling");
604 0 : return -EOPNOTSUPP;
605 : }
606 :
607 0 : if (IS_ENCRYPTED(orig_inode) || IS_ENCRYPTED(donor_inode)) {
608 0 : ext4_msg(orig_inode->i_sb, KERN_ERR,
609 : "Online defrag not supported for encrypted files");
610 0 : return -EOPNOTSUPP;
611 : }
612 :
613 : /* Protect orig and donor inodes against a truncate */
614 0 : lock_two_nondirectories(orig_inode, donor_inode);
615 :
616 : /* Wait for all existing dio workers */
617 0 : inode_dio_wait(orig_inode);
618 0 : inode_dio_wait(donor_inode);
619 :
620 : /* Protect extent tree against block allocations via delalloc */
621 0 : ext4_double_down_write_data_sem(orig_inode, donor_inode);
622 : /* Check the filesystem environment whether move_extent can be done */
623 0 : ret = mext_check_arguments(orig_inode, donor_inode, orig_blk,
624 : donor_blk, &len);
625 0 : if (ret)
626 0 : goto out;
627 0 : o_end = o_start + len;
628 :
629 0 : while (o_start < o_end) {
630 0 : struct ext4_extent *ex;
631 0 : ext4_lblk_t cur_blk, next_blk;
632 0 : pgoff_t orig_page_index, donor_page_index;
633 0 : int offset_in_page;
634 0 : int unwritten, cur_len;
635 :
636 0 : ret = get_ext_path(orig_inode, o_start, &path);
637 0 : if (ret)
638 0 : goto out;
639 0 : ex = path[path->p_depth].p_ext;
640 0 : cur_blk = le32_to_cpu(ex->ee_block);
641 0 : cur_len = ext4_ext_get_actual_len(ex);
642 : /* Check hole before the start pos */
643 0 : if (cur_blk + cur_len - 1 < o_start) {
644 0 : next_blk = ext4_ext_next_allocated_block(path);
645 0 : if (next_blk == EXT_MAX_BLOCKS) {
646 0 : ret = -ENODATA;
647 0 : goto out;
648 : }
649 0 : d_start += next_blk - o_start;
650 0 : o_start = next_blk;
651 0 : continue;
652 : /* Check hole after the start pos */
653 0 : } else if (cur_blk > o_start) {
654 : /* Skip hole */
655 0 : d_start += cur_blk - o_start;
656 0 : o_start = cur_blk;
657 : /* Extent inside requested range ?*/
658 0 : if (cur_blk >= o_end)
659 0 : goto out;
660 : } else { /* in_range(o_start, o_blk, o_len) */
661 0 : cur_len += cur_blk - o_start;
662 : }
663 0 : unwritten = ext4_ext_is_unwritten(ex);
664 0 : if (o_end - o_start < cur_len)
665 0 : cur_len = o_end - o_start;
666 :
667 0 : orig_page_index = o_start >> (PAGE_SHIFT -
668 0 : orig_inode->i_blkbits);
669 0 : donor_page_index = d_start >> (PAGE_SHIFT -
670 0 : donor_inode->i_blkbits);
671 0 : offset_in_page = o_start % blocks_per_page;
672 0 : if (cur_len > blocks_per_page - offset_in_page)
673 : cur_len = blocks_per_page - offset_in_page;
674 : /*
675 : * Up semaphore to avoid following problems:
676 : * a. transaction deadlock among ext4_journal_start,
677 : * ->write_begin via pagefault, and jbd2_journal_commit
678 : * b. racing with ->read_folio, ->write_begin, and
679 : * ext4_get_block in move_extent_per_page
680 : */
681 0 : ext4_double_up_write_data_sem(orig_inode, donor_inode);
682 : /* Swap original branches with new branches */
683 0 : move_extent_per_page(o_filp, donor_inode,
684 : orig_page_index, donor_page_index,
685 : offset_in_page, cur_len,
686 : unwritten, &ret);
687 0 : ext4_double_down_write_data_sem(orig_inode, donor_inode);
688 0 : if (ret < 0)
689 : break;
690 0 : o_start += cur_len;
691 0 : d_start += cur_len;
692 : }
693 0 : *moved_len = o_start - orig_blk;
694 0 : if (*moved_len > len)
695 0 : *moved_len = len;
696 :
697 0 : out:
698 0 : if (*moved_len) {
699 0 : ext4_discard_preallocations(orig_inode, 0);
700 0 : ext4_discard_preallocations(donor_inode, 0);
701 : }
702 :
703 0 : ext4_free_ext_path(path);
704 0 : ext4_double_up_write_data_sem(orig_inode, donor_inode);
705 0 : unlock_two_nondirectories(orig_inode, donor_inode);
706 :
707 0 : return ret;
708 : }
|