LCOV - code coverage report
Current view: top level - fs/btrfs - extent_map.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-djwa @ Mon Jul 31 20:08:17 PDT 2023 Lines: 3 501 0.6 %
Date: 2023-07-31 20:08:17 Functions: 1 26 3.8 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : 
       3             : #include <linux/err.h>
       4             : #include <linux/slab.h>
       5             : #include <linux/spinlock.h>
       6             : #include "messages.h"
       7             : #include "ctree.h"
       8             : #include "volumes.h"
       9             : #include "extent_map.h"
      10             : #include "compression.h"
      11             : #include "btrfs_inode.h"
      12             : 
      13             : 
      14             : static struct kmem_cache *extent_map_cache;
      15             : 
      16           2 : int __init extent_map_init(void)
      17             : {
      18           2 :         extent_map_cache = kmem_cache_create("btrfs_extent_map",
      19             :                         sizeof(struct extent_map), 0,
      20             :                         SLAB_MEM_SPREAD, NULL);
      21           2 :         if (!extent_map_cache)
      22           0 :                 return -ENOMEM;
      23             :         return 0;
      24             : }
      25             : 
      26           0 : void __cold extent_map_exit(void)
      27             : {
      28           0 :         kmem_cache_destroy(extent_map_cache);
      29           0 : }
      30             : 
      31             : /*
      32             :  * Initialize the extent tree @tree.  Should be called for each new inode or
      33             :  * other user of the extent_map interface.
      34             :  */
      35           0 : void extent_map_tree_init(struct extent_map_tree *tree)
      36             : {
      37           0 :         tree->map = RB_ROOT_CACHED;
      38           0 :         INIT_LIST_HEAD(&tree->modified_extents);
      39           0 :         rwlock_init(&tree->lock);
      40           0 : }
      41             : 
      42             : /*
      43             :  * Allocate a new extent_map structure.  The new structure is returned with a
      44             :  * reference count of one and needs to be freed using free_extent_map()
      45             :  */
      46           0 : struct extent_map *alloc_extent_map(void)
      47             : {
      48           0 :         struct extent_map *em;
      49           0 :         em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS);
      50           0 :         if (!em)
      51             :                 return NULL;
      52           0 :         RB_CLEAR_NODE(&em->rb_node);
      53           0 :         em->compress_type = BTRFS_COMPRESS_NONE;
      54           0 :         refcount_set(&em->refs, 1);
      55           0 :         INIT_LIST_HEAD(&em->list);
      56           0 :         return em;
      57             : }
      58             : 
      59             : /*
      60             :  * Drop the reference out on @em by one and free the structure if the reference
      61             :  * count hits zero.
      62             :  */
      63           0 : void free_extent_map(struct extent_map *em)
      64             : {
      65           0 :         if (!em)
      66             :                 return;
      67           0 :         if (refcount_dec_and_test(&em->refs)) {
      68           0 :                 WARN_ON(extent_map_in_tree(em));
      69           0 :                 WARN_ON(!list_empty(&em->list));
      70           0 :                 if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
      71           0 :                         kfree(em->map_lookup);
      72           0 :                 kmem_cache_free(extent_map_cache, em);
      73             :         }
      74             : }
      75             : 
      76             : /* Do the math around the end of an extent, handling wrapping. */
      77             : static u64 range_end(u64 start, u64 len)
      78             : {
      79           0 :         if (start + len < start)
      80           0 :                 return (u64)-1;
      81             :         return start + len;
      82             : }
      83             : 
      84           0 : static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
      85             : {
      86           0 :         struct rb_node **p = &root->rb_root.rb_node;
      87           0 :         struct rb_node *parent = NULL;
      88           0 :         struct extent_map *entry = NULL;
      89           0 :         struct rb_node *orig_parent = NULL;
      90           0 :         u64 end = range_end(em->start, em->len);
      91           0 :         bool leftmost = true;
      92             : 
      93           0 :         while (*p) {
      94           0 :                 parent = *p;
      95           0 :                 entry = rb_entry(parent, struct extent_map, rb_node);
      96             : 
      97           0 :                 if (em->start < entry->start) {
      98           0 :                         p = &(*p)->rb_left;
      99           0 :                 } else if (em->start >= extent_map_end(entry)) {
     100           0 :                         p = &(*p)->rb_right;
     101           0 :                         leftmost = false;
     102             :                 } else {
     103             :                         return -EEXIST;
     104             :                 }
     105             :         }
     106             : 
     107             :         orig_parent = parent;
     108           0 :         while (parent && em->start >= extent_map_end(entry)) {
     109           0 :                 parent = rb_next(parent);
     110           0 :                 entry = rb_entry(parent, struct extent_map, rb_node);
     111             :         }
     112           0 :         if (parent)
     113           0 :                 if (end > entry->start && em->start < extent_map_end(entry))
     114             :                         return -EEXIST;
     115             : 
     116             :         parent = orig_parent;
     117             :         entry = rb_entry(parent, struct extent_map, rb_node);
     118           0 :         while (parent && em->start < entry->start) {
     119           0 :                 parent = rb_prev(parent);
     120           0 :                 entry = rb_entry(parent, struct extent_map, rb_node);
     121             :         }
     122           0 :         if (parent)
     123           0 :                 if (end > entry->start && em->start < extent_map_end(entry))
     124             :                         return -EEXIST;
     125             : 
     126           0 :         rb_link_node(&em->rb_node, orig_parent, p);
     127           0 :         rb_insert_color_cached(&em->rb_node, root, leftmost);
     128           0 :         return 0;
     129             : }
     130             : 
     131             : /*
     132             :  * Search through the tree for an extent_map with a given offset.  If it can't
     133             :  * be found, try to find some neighboring extents
     134             :  */
     135           0 : static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
     136             :                                      struct rb_node **prev_or_next_ret)
     137             : {
     138           0 :         struct rb_node *n = root->rb_node;
     139           0 :         struct rb_node *prev = NULL;
     140           0 :         struct rb_node *orig_prev = NULL;
     141           0 :         struct extent_map *entry;
     142           0 :         struct extent_map *prev_entry = NULL;
     143             : 
     144           0 :         ASSERT(prev_or_next_ret);
     145             : 
     146           0 :         while (n) {
     147           0 :                 entry = rb_entry(n, struct extent_map, rb_node);
     148           0 :                 prev = n;
     149           0 :                 prev_entry = entry;
     150             : 
     151           0 :                 if (offset < entry->start)
     152           0 :                         n = n->rb_left;
     153           0 :                 else if (offset >= extent_map_end(entry))
     154           0 :                         n = n->rb_right;
     155             :                 else
     156           0 :                         return n;
     157             :         }
     158             : 
     159             :         orig_prev = prev;
     160           0 :         while (prev && offset >= extent_map_end(prev_entry)) {
     161           0 :                 prev = rb_next(prev);
     162           0 :                 prev_entry = rb_entry(prev, struct extent_map, rb_node);
     163             :         }
     164             : 
     165             :         /*
     166             :          * Previous extent map found, return as in this case the caller does not
     167             :          * care about the next one.
     168             :          */
     169           0 :         if (prev) {
     170           0 :                 *prev_or_next_ret = prev;
     171           0 :                 return NULL;
     172             :         }
     173             : 
     174             :         prev = orig_prev;
     175             :         prev_entry = rb_entry(prev, struct extent_map, rb_node);
     176           0 :         while (prev && offset < prev_entry->start) {
     177           0 :                 prev = rb_prev(prev);
     178           0 :                 prev_entry = rb_entry(prev, struct extent_map, rb_node);
     179             :         }
     180           0 :         *prev_or_next_ret = prev;
     181             : 
     182           0 :         return NULL;
     183             : }
     184             : 
     185             : /* Check to see if two extent_map structs are adjacent and safe to merge. */
     186           0 : static int mergable_maps(struct extent_map *prev, struct extent_map *next)
     187             : {
     188           0 :         if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
     189             :                 return 0;
     190             : 
     191             :         /*
     192             :          * don't merge compressed extents, we need to know their
     193             :          * actual size
     194             :          */
     195           0 :         if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
     196             :                 return 0;
     197             : 
     198           0 :         if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
     199           0 :             test_bit(EXTENT_FLAG_LOGGING, &next->flags))
     200             :                 return 0;
     201             : 
     202             :         /*
     203             :          * We don't want to merge stuff that hasn't been written to the log yet
     204             :          * since it may not reflect exactly what is on disk, and that would be
     205             :          * bad.
     206             :          */
     207           0 :         if (!list_empty(&prev->list) || !list_empty(&next->list))
     208             :                 return 0;
     209             : 
     210           0 :         ASSERT(next->block_start != EXTENT_MAP_DELALLOC &&
     211             :                prev->block_start != EXTENT_MAP_DELALLOC);
     212             : 
     213           0 :         if (prev->map_lookup || next->map_lookup)
     214           0 :                 ASSERT(test_bit(EXTENT_FLAG_FS_MAPPING, &prev->flags) &&
     215             :                        test_bit(EXTENT_FLAG_FS_MAPPING, &next->flags));
     216             : 
     217           0 :         if (extent_map_end(prev) == next->start &&
     218           0 :             prev->flags == next->flags &&
     219           0 :             prev->map_lookup == next->map_lookup &&
     220           0 :             ((next->block_start == EXTENT_MAP_HOLE &&
     221           0 :               prev->block_start == EXTENT_MAP_HOLE) ||
     222           0 :              (next->block_start == EXTENT_MAP_INLINE &&
     223           0 :               prev->block_start == EXTENT_MAP_INLINE) ||
     224           0 :              (next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
     225             :               next->block_start == extent_map_block_end(prev)))) {
     226           0 :                 return 1;
     227             :         }
     228             :         return 0;
     229             : }
     230             : 
     231           0 : static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
     232             : {
     233           0 :         struct extent_map *merge = NULL;
     234           0 :         struct rb_node *rb;
     235             : 
     236             :         /*
     237             :          * We can't modify an extent map that is in the tree and that is being
     238             :          * used by another task, as it can cause that other task to see it in
     239             :          * inconsistent state during the merging. We always have 1 reference for
     240             :          * the tree and 1 for this task (which is unpinning the extent map or
     241             :          * clearing the logging flag), so anything > 2 means it's being used by
     242             :          * other tasks too.
     243             :          */
     244           0 :         if (refcount_read(&em->refs) > 2)
     245             :                 return;
     246             : 
     247           0 :         if (em->start != 0) {
     248           0 :                 rb = rb_prev(&em->rb_node);
     249           0 :                 if (rb)
     250           0 :                         merge = rb_entry(rb, struct extent_map, rb_node);
     251           0 :                 if (rb && mergable_maps(merge, em)) {
     252           0 :                         em->start = merge->start;
     253           0 :                         em->orig_start = merge->orig_start;
     254           0 :                         em->len += merge->len;
     255           0 :                         em->block_len += merge->block_len;
     256           0 :                         em->block_start = merge->block_start;
     257           0 :                         em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
     258           0 :                         em->mod_start = merge->mod_start;
     259           0 :                         em->generation = max(em->generation, merge->generation);
     260           0 :                         set_bit(EXTENT_FLAG_MERGED, &em->flags);
     261             : 
     262           0 :                         rb_erase_cached(&merge->rb_node, &tree->map);
     263           0 :                         RB_CLEAR_NODE(&merge->rb_node);
     264           0 :                         free_extent_map(merge);
     265             :                 }
     266             :         }
     267             : 
     268           0 :         rb = rb_next(&em->rb_node);
     269           0 :         if (rb)
     270           0 :                 merge = rb_entry(rb, struct extent_map, rb_node);
     271           0 :         if (rb && mergable_maps(em, merge)) {
     272           0 :                 em->len += merge->len;
     273           0 :                 em->block_len += merge->block_len;
     274           0 :                 rb_erase_cached(&merge->rb_node, &tree->map);
     275           0 :                 RB_CLEAR_NODE(&merge->rb_node);
     276           0 :                 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
     277           0 :                 em->generation = max(em->generation, merge->generation);
     278           0 :                 set_bit(EXTENT_FLAG_MERGED, &em->flags);
     279           0 :                 free_extent_map(merge);
     280             :         }
     281             : }
     282             : 
     283             : /*
     284             :  * Unpin an extent from the cache.
     285             :  *
     286             :  * @tree:       tree to unpin the extent in
     287             :  * @start:      logical offset in the file
     288             :  * @len:        length of the extent
     289             :  * @gen:        generation that this extent has been modified in
     290             :  *
     291             :  * Called after an extent has been written to disk properly.  Set the generation
     292             :  * to the generation that actually added the file item to the inode so we know
     293             :  * we need to sync this extent when we call fsync().
     294             :  */
     295           0 : int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
     296             :                        u64 gen)
     297             : {
     298           0 :         int ret = 0;
     299           0 :         struct extent_map *em;
     300           0 :         bool prealloc = false;
     301             : 
     302           0 :         write_lock(&tree->lock);
     303           0 :         em = lookup_extent_mapping(tree, start, len);
     304             : 
     305           0 :         WARN_ON(!em || em->start != start);
     306             : 
     307           0 :         if (!em)
     308           0 :                 goto out;
     309             : 
     310           0 :         em->generation = gen;
     311           0 :         clear_bit(EXTENT_FLAG_PINNED, &em->flags);
     312           0 :         em->mod_start = em->start;
     313           0 :         em->mod_len = em->len;
     314             : 
     315           0 :         if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) {
     316           0 :                 prealloc = true;
     317           0 :                 clear_bit(EXTENT_FLAG_FILLING, &em->flags);
     318             :         }
     319             : 
     320           0 :         try_merge_map(tree, em);
     321             : 
     322           0 :         if (prealloc) {
     323           0 :                 em->mod_start = em->start;
     324           0 :                 em->mod_len = em->len;
     325             :         }
     326             : 
     327           0 :         free_extent_map(em);
     328           0 : out:
     329           0 :         write_unlock(&tree->lock);
     330           0 :         return ret;
     331             : 
     332             : }
     333             : 
     334           0 : void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
     335             : {
     336           0 :         lockdep_assert_held_write(&tree->lock);
     337             : 
     338           0 :         clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
     339           0 :         if (extent_map_in_tree(em))
     340           0 :                 try_merge_map(tree, em);
     341           0 : }
     342             : 
     343           0 : static inline void setup_extent_mapping(struct extent_map_tree *tree,
     344             :                                         struct extent_map *em,
     345             :                                         int modified)
     346             : {
     347           0 :         refcount_inc(&em->refs);
     348           0 :         em->mod_start = em->start;
     349           0 :         em->mod_len = em->len;
     350             : 
     351           0 :         if (modified)
     352           0 :                 list_move(&em->list, &tree->modified_extents);
     353             :         else
     354           0 :                 try_merge_map(tree, em);
     355           0 : }
     356             : 
     357           0 : static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
     358             : {
     359           0 :         struct map_lookup *map = em->map_lookup;
     360           0 :         u64 stripe_size = em->orig_block_len;
     361           0 :         int i;
     362             : 
     363           0 :         for (i = 0; i < map->num_stripes; i++) {
     364           0 :                 struct btrfs_io_stripe *stripe = &map->stripes[i];
     365           0 :                 struct btrfs_device *device = stripe->dev;
     366             : 
     367           0 :                 set_extent_bit(&device->alloc_state, stripe->physical,
     368           0 :                                stripe->physical + stripe_size - 1,
     369             :                                bits | EXTENT_NOWAIT, NULL);
     370             :         }
     371           0 : }
     372             : 
     373           0 : static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
     374             : {
     375           0 :         struct map_lookup *map = em->map_lookup;
     376           0 :         u64 stripe_size = em->orig_block_len;
     377           0 :         int i;
     378             : 
     379           0 :         for (i = 0; i < map->num_stripes; i++) {
     380           0 :                 struct btrfs_io_stripe *stripe = &map->stripes[i];
     381           0 :                 struct btrfs_device *device = stripe->dev;
     382             : 
     383           0 :                 __clear_extent_bit(&device->alloc_state, stripe->physical,
     384           0 :                                    stripe->physical + stripe_size - 1,
     385             :                                    bits | EXTENT_NOWAIT,
     386             :                                    NULL, NULL);
     387             :         }
     388           0 : }
     389             : 
     390             : /*
     391             :  * Add new extent map to the extent tree
     392             :  *
     393             :  * @tree:       tree to insert new map in
     394             :  * @em:         map to insert
     395             :  * @modified:   indicate whether the given @em should be added to the
     396             :  *              modified list, which indicates the extent needs to be logged
     397             :  *
     398             :  * Insert @em into @tree or perform a simple forward/backward merge with
     399             :  * existing mappings.  The extent_map struct passed in will be inserted
     400             :  * into the tree directly, with an additional reference taken, or a
     401             :  * reference dropped if the merge attempt was successful.
     402             :  */
     403           0 : int add_extent_mapping(struct extent_map_tree *tree,
     404             :                        struct extent_map *em, int modified)
     405             : {
     406           0 :         int ret = 0;
     407             : 
     408           0 :         lockdep_assert_held_write(&tree->lock);
     409             : 
     410           0 :         ret = tree_insert(&tree->map, em);
     411           0 :         if (ret)
     412           0 :                 goto out;
     413             : 
     414           0 :         setup_extent_mapping(tree, em, modified);
     415           0 :         if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) {
     416           0 :                 extent_map_device_set_bits(em, CHUNK_ALLOCATED);
     417           0 :                 extent_map_device_clear_bits(em, CHUNK_TRIMMED);
     418             :         }
     419           0 : out:
     420           0 :         return ret;
     421             : }
     422             : 
     423             : static struct extent_map *
     424           0 : __lookup_extent_mapping(struct extent_map_tree *tree,
     425             :                         u64 start, u64 len, int strict)
     426             : {
     427           0 :         struct extent_map *em;
     428           0 :         struct rb_node *rb_node;
     429           0 :         struct rb_node *prev_or_next = NULL;
     430           0 :         u64 end = range_end(start, len);
     431             : 
     432           0 :         rb_node = __tree_search(&tree->map.rb_root, start, &prev_or_next);
     433           0 :         if (!rb_node) {
     434           0 :                 if (prev_or_next)
     435             :                         rb_node = prev_or_next;
     436             :                 else
     437             :                         return NULL;
     438             :         }
     439             : 
     440           0 :         em = rb_entry(rb_node, struct extent_map, rb_node);
     441             : 
     442           0 :         if (strict && !(end > em->start && start < extent_map_end(em)))
     443             :                 return NULL;
     444             : 
     445           0 :         refcount_inc(&em->refs);
     446           0 :         return em;
     447             : }
     448             : 
     449             : /*
     450             :  * Lookup extent_map that intersects @start + @len range.
     451             :  *
     452             :  * @tree:       tree to lookup in
     453             :  * @start:      byte offset to start the search
     454             :  * @len:        length of the lookup range
     455             :  *
     456             :  * Find and return the first extent_map struct in @tree that intersects the
     457             :  * [start, len] range.  There may be additional objects in the tree that
     458             :  * intersect, so check the object returned carefully to make sure that no
     459             :  * additional lookups are needed.
     460             :  */
     461           0 : struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
     462             :                                          u64 start, u64 len)
     463             : {
     464           0 :         return __lookup_extent_mapping(tree, start, len, 1);
     465             : }
     466             : 
     467             : /*
     468             :  * Find a nearby extent map intersecting @start + @len (not an exact search).
     469             :  *
     470             :  * @tree:       tree to lookup in
     471             :  * @start:      byte offset to start the search
     472             :  * @len:        length of the lookup range
     473             :  *
     474             :  * Find and return the first extent_map struct in @tree that intersects the
     475             :  * [start, len] range.
     476             :  *
     477             :  * If one can't be found, any nearby extent may be returned
     478             :  */
     479           0 : struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
     480             :                                          u64 start, u64 len)
     481             : {
     482           0 :         return __lookup_extent_mapping(tree, start, len, 0);
     483             : }
     484             : 
     485             : /*
     486             :  * Remove an extent_map from the extent tree.
     487             :  *
     488             :  * @tree:       extent tree to remove from
     489             :  * @em:         extent map being removed
     490             :  *
     491             :  * Remove @em from @tree.  No reference counts are dropped, and no checks
     492             :  * are done to see if the range is in use.
     493             :  */
     494           0 : void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
     495             : {
     496           0 :         lockdep_assert_held_write(&tree->lock);
     497             : 
     498           0 :         WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
     499           0 :         rb_erase_cached(&em->rb_node, &tree->map);
     500           0 :         if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
     501           0 :                 list_del_init(&em->list);
     502           0 :         if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
     503           0 :                 extent_map_device_clear_bits(em, CHUNK_ALLOCATED);
     504           0 :         RB_CLEAR_NODE(&em->rb_node);
     505           0 : }
     506             : 
     507           0 : static void replace_extent_mapping(struct extent_map_tree *tree,
     508             :                                    struct extent_map *cur,
     509             :                                    struct extent_map *new,
     510             :                                    int modified)
     511             : {
     512           0 :         lockdep_assert_held_write(&tree->lock);
     513             : 
     514           0 :         WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
     515           0 :         ASSERT(extent_map_in_tree(cur));
     516           0 :         if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
     517           0 :                 list_del_init(&cur->list);
     518           0 :         rb_replace_node_cached(&cur->rb_node, &new->rb_node, &tree->map);
     519           0 :         RB_CLEAR_NODE(&cur->rb_node);
     520             : 
     521           0 :         setup_extent_mapping(tree, new, modified);
     522           0 : }
     523             : 
     524             : static struct extent_map *next_extent_map(const struct extent_map *em)
     525             : {
     526           0 :         struct rb_node *next;
     527             : 
     528           0 :         next = rb_next(&em->rb_node);
     529           0 :         if (!next)
     530           0 :                 return NULL;
     531             :         return container_of(next, struct extent_map, rb_node);
     532             : }
     533             : 
     534             : static struct extent_map *prev_extent_map(struct extent_map *em)
     535             : {
     536           0 :         struct rb_node *prev;
     537             : 
     538           0 :         prev = rb_prev(&em->rb_node);
     539           0 :         if (!prev)
     540           0 :                 return NULL;
     541             :         return container_of(prev, struct extent_map, rb_node);
     542             : }
     543             : 
     544             : /*
     545             :  * Helper for btrfs_get_extent.  Given an existing extent in the tree,
     546             :  * the existing extent is the nearest extent to map_start,
     547             :  * and an extent that you want to insert, deal with overlap and insert
     548             :  * the best fitted new extent into the tree.
     549             :  */
     550           0 : static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
     551             :                                          struct extent_map *existing,
     552             :                                          struct extent_map *em,
     553             :                                          u64 map_start)
     554             : {
     555           0 :         struct extent_map *prev;
     556           0 :         struct extent_map *next;
     557           0 :         u64 start;
     558           0 :         u64 end;
     559           0 :         u64 start_diff;
     560             : 
     561           0 :         BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
     562             : 
     563           0 :         if (existing->start > map_start) {
     564           0 :                 next = existing;
     565           0 :                 prev = prev_extent_map(next);
     566             :         } else {
     567           0 :                 prev = existing;
     568           0 :                 next = next_extent_map(prev);
     569             :         }
     570             : 
     571           0 :         start = prev ? extent_map_end(prev) : em->start;
     572           0 :         start = max_t(u64, start, em->start);
     573           0 :         end = next ? next->start : extent_map_end(em);
     574           0 :         end = min_t(u64, end, extent_map_end(em));
     575           0 :         start_diff = start - em->start;
     576           0 :         em->start = start;
     577           0 :         em->len = end - start;
     578           0 :         if (em->block_start < EXTENT_MAP_LAST_BYTE &&
     579           0 :             !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
     580           0 :                 em->block_start += start_diff;
     581           0 :                 em->block_len = em->len;
     582             :         }
     583           0 :         return add_extent_mapping(em_tree, em, 0);
     584             : }
     585             : 
     586             : /*
     587             :  * Add extent mapping into em_tree.
     588             :  *
     589             :  * @fs_info:  the filesystem
     590             :  * @em_tree:  extent tree into which we want to insert the extent mapping
     591             :  * @em_in:    extent we are inserting
     592             :  * @start:    start of the logical range btrfs_get_extent() is requesting
     593             :  * @len:      length of the logical range btrfs_get_extent() is requesting
     594             :  *
     595             :  * Note that @em_in's range may be different from [start, start+len),
     596             :  * but they must be overlapped.
     597             :  *
     598             :  * Insert @em_in into @em_tree. In case there is an overlapping range, handle
     599             :  * the -EEXIST by either:
     600             :  * a) Returning the existing extent in @em_in if @start is within the
     601             :  *    existing em.
     602             :  * b) Merge the existing extent with @em_in passed in.
     603             :  *
     604             :  * Return 0 on success, otherwise -EEXIST.
     605             :  *
     606             :  */
     607           0 : int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
     608             :                              struct extent_map_tree *em_tree,
     609             :                              struct extent_map **em_in, u64 start, u64 len)
     610             : {
     611           0 :         int ret;
     612           0 :         struct extent_map *em = *em_in;
     613             : 
     614             :         /*
     615             :          * Tree-checker should have rejected any inline extent with non-zero
     616             :          * file offset. Here just do a sanity check.
     617             :          */
     618           0 :         if (em->block_start == EXTENT_MAP_INLINE)
     619             :                 ASSERT(em->start == 0);
     620             : 
     621           0 :         ret = add_extent_mapping(em_tree, em, 0);
     622             :         /* it is possible that someone inserted the extent into the tree
     623             :          * while we had the lock dropped.  It is also possible that
     624             :          * an overlapping map exists in the tree
     625             :          */
     626           0 :         if (ret == -EEXIST) {
     627           0 :                 struct extent_map *existing;
     628             : 
     629           0 :                 ret = 0;
     630             : 
     631           0 :                 existing = search_extent_mapping(em_tree, start, len);
     632             : 
     633           0 :                 trace_btrfs_handle_em_exist(fs_info, existing, em, start, len);
     634             : 
     635             :                 /*
     636             :                  * existing will always be non-NULL, since there must be
     637             :                  * extent causing the -EEXIST.
     638             :                  */
     639           0 :                 if (start >= existing->start &&
     640             :                     start < extent_map_end(existing)) {
     641           0 :                         free_extent_map(em);
     642           0 :                         *em_in = existing;
     643           0 :                         ret = 0;
     644             :                 } else {
     645           0 :                         u64 orig_start = em->start;
     646           0 :                         u64 orig_len = em->len;
     647             : 
     648             :                         /*
     649             :                          * The existing extent map is the one nearest to
     650             :                          * the [start, start + len) range which overlaps
     651             :                          */
     652           0 :                         ret = merge_extent_mapping(em_tree, existing,
     653             :                                                    em, start);
     654           0 :                         if (ret) {
     655           0 :                                 free_extent_map(em);
     656           0 :                                 *em_in = NULL;
     657           0 :                                 WARN_ONCE(ret,
     658             : "unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n",
     659             :                                           ret, existing->start, existing->len,
     660             :                                           orig_start, orig_len);
     661             :                         }
     662           0 :                         free_extent_map(existing);
     663             :                 }
     664             :         }
     665             : 
     666           0 :         ASSERT(ret == 0 || ret == -EEXIST);
     667           0 :         return ret;
     668             : }
     669             : 
     670             : /*
     671             :  * Drop all extent maps from a tree in the fastest possible way, rescheduling
     672             :  * if needed. This avoids searching the tree, from the root down to the first
     673             :  * extent map, before each deletion.
     674             :  */
     675           0 : static void drop_all_extent_maps_fast(struct extent_map_tree *tree)
     676             : {
     677           0 :         write_lock(&tree->lock);
     678           0 :         while (!RB_EMPTY_ROOT(&tree->map.rb_root)) {
     679           0 :                 struct extent_map *em;
     680           0 :                 struct rb_node *node;
     681             : 
     682           0 :                 node = rb_first_cached(&tree->map);
     683           0 :                 em = rb_entry(node, struct extent_map, rb_node);
     684           0 :                 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
     685           0 :                 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
     686           0 :                 remove_extent_mapping(tree, em);
     687           0 :                 free_extent_map(em);
     688           0 :                 cond_resched_rwlock_write(&tree->lock);
     689             :         }
     690           0 :         write_unlock(&tree->lock);
     691           0 : }
     692             : 
     693             : /*
     694             :  * Drop all extent maps in a given range.
     695             :  *
     696             :  * @inode:       The target inode.
     697             :  * @start:       Start offset of the range.
     698             :  * @end:         End offset of the range (inclusive value).
     699             :  * @skip_pinned: Indicate if pinned extent maps should be ignored or not.
     700             :  *
     701             :  * This drops all the extent maps that intersect the given range [@start, @end].
     702             :  * Extent maps that partially overlap the range and extend behind or beyond it,
     703             :  * are split.
     704             :  * The caller should have locked an appropriate file range in the inode's io
     705             :  * tree before calling this function.
     706             :  */
     707           0 : void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
     708             :                                  bool skip_pinned)
     709             : {
     710           0 :         struct extent_map *split;
     711           0 :         struct extent_map *split2;
     712           0 :         struct extent_map *em;
     713           0 :         struct extent_map_tree *em_tree = &inode->extent_tree;
     714           0 :         u64 len = end - start + 1;
     715             : 
     716           0 :         WARN_ON(end < start);
     717           0 :         if (end == (u64)-1) {
     718           0 :                 if (start == 0 && !skip_pinned) {
     719           0 :                         drop_all_extent_maps_fast(em_tree);
     720           0 :                         return;
     721             :                 }
     722             :                 len = (u64)-1;
     723             :         } else {
     724             :                 /* Make end offset exclusive for use in the loop below. */
     725           0 :                 end++;
     726             :         }
     727             : 
     728             :         /*
     729             :          * It's ok if we fail to allocate the extent maps, see the comment near
     730             :          * the bottom of the loop below. We only need two spare extent maps in
     731             :          * the worst case, where the first extent map that intersects our range
     732             :          * starts before the range and the last extent map that intersects our
     733             :          * range ends after our range (and they might be the same extent map),
     734             :          * because we need to split those two extent maps at the boundaries.
     735             :          */
     736           0 :         split = alloc_extent_map();
     737           0 :         split2 = alloc_extent_map();
     738             : 
     739           0 :         write_lock(&em_tree->lock);
     740           0 :         em = lookup_extent_mapping(em_tree, start, len);
     741             : 
     742           0 :         while (em) {
     743             :                 /* extent_map_end() returns exclusive value (last byte + 1). */
     744           0 :                 const u64 em_end = extent_map_end(em);
     745           0 :                 struct extent_map *next_em = NULL;
     746           0 :                 u64 gen;
     747           0 :                 unsigned long flags;
     748           0 :                 bool modified;
     749           0 :                 bool compressed;
     750             : 
     751           0 :                 if (em_end < end) {
     752           0 :                         next_em = next_extent_map(em);
     753           0 :                         if (next_em) {
     754           0 :                                 if (next_em->start < end)
     755           0 :                                         refcount_inc(&next_em->refs);
     756             :                                 else
     757             :                                         next_em = NULL;
     758             :                         }
     759             :                 }
     760             : 
     761           0 :                 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
     762           0 :                         start = em_end;
     763           0 :                         if (end != (u64)-1)
     764             :                                 len = start + len - em_end;
     765           0 :                         goto next;
     766             :                 }
     767             : 
     768           0 :                 flags = em->flags;
     769           0 :                 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
     770             :                 /*
     771             :                  * In case we split the extent map, we want to preserve the
     772             :                  * EXTENT_FLAG_LOGGING flag on our extent map, but we don't want
     773             :                  * it on the new extent maps.
     774             :                  */
     775           0 :                 clear_bit(EXTENT_FLAG_LOGGING, &flags);
     776           0 :                 modified = !list_empty(&em->list);
     777             : 
     778             :                 /*
     779             :                  * The extent map does not cross our target range, so no need to
     780             :                  * split it, we can remove it directly.
     781             :                  */
     782           0 :                 if (em->start >= start && em_end <= end)
     783           0 :                         goto remove_em;
     784             : 
     785           0 :                 gen = em->generation;
     786           0 :                 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
     787             : 
     788           0 :                 if (em->start < start) {
     789           0 :                         if (!split) {
     790           0 :                                 split = split2;
     791           0 :                                 split2 = NULL;
     792           0 :                                 if (!split)
     793           0 :                                         goto remove_em;
     794             :                         }
     795           0 :                         split->start = em->start;
     796           0 :                         split->len = start - em->start;
     797             : 
     798           0 :                         if (em->block_start < EXTENT_MAP_LAST_BYTE) {
     799           0 :                                 split->orig_start = em->orig_start;
     800           0 :                                 split->block_start = em->block_start;
     801             : 
     802           0 :                                 if (compressed)
     803           0 :                                         split->block_len = em->block_len;
     804             :                                 else
     805           0 :                                         split->block_len = split->len;
     806           0 :                                 split->orig_block_len = max(split->block_len,
     807             :                                                 em->orig_block_len);
     808           0 :                                 split->ram_bytes = em->ram_bytes;
     809             :                         } else {
     810           0 :                                 split->orig_start = split->start;
     811           0 :                                 split->block_len = 0;
     812           0 :                                 split->block_start = em->block_start;
     813           0 :                                 split->orig_block_len = 0;
     814           0 :                                 split->ram_bytes = split->len;
     815             :                         }
     816             : 
     817           0 :                         split->generation = gen;
     818           0 :                         split->flags = flags;
     819           0 :                         split->compress_type = em->compress_type;
     820           0 :                         replace_extent_mapping(em_tree, em, split, modified);
     821           0 :                         free_extent_map(split);
     822           0 :                         split = split2;
     823           0 :                         split2 = NULL;
     824             :                 }
     825           0 :                 if (em_end > end) {
     826           0 :                         if (!split) {
     827           0 :                                 split = split2;
     828           0 :                                 split2 = NULL;
     829           0 :                                 if (!split)
     830           0 :                                         goto remove_em;
     831             :                         }
     832           0 :                         split->start = start + len;
     833           0 :                         split->len = em_end - (start + len);
     834           0 :                         split->block_start = em->block_start;
     835           0 :                         split->flags = flags;
     836           0 :                         split->compress_type = em->compress_type;
     837           0 :                         split->generation = gen;
     838             : 
     839           0 :                         if (em->block_start < EXTENT_MAP_LAST_BYTE) {
     840           0 :                                 split->orig_block_len = max(em->block_len,
     841             :                                                     em->orig_block_len);
     842             : 
     843           0 :                                 split->ram_bytes = em->ram_bytes;
     844           0 :                                 if (compressed) {
     845           0 :                                         split->block_len = em->block_len;
     846           0 :                                         split->orig_start = em->orig_start;
     847             :                                 } else {
     848           0 :                                         const u64 diff = start + len - em->start;
     849             : 
     850           0 :                                         split->block_len = split->len;
     851           0 :                                         split->block_start += diff;
     852           0 :                                         split->orig_start = em->orig_start;
     853             :                                 }
     854             :                         } else {
     855           0 :                                 split->ram_bytes = split->len;
     856           0 :                                 split->orig_start = split->start;
     857           0 :                                 split->block_len = 0;
     858           0 :                                 split->orig_block_len = 0;
     859             :                         }
     860             : 
     861           0 :                         if (extent_map_in_tree(em)) {
     862           0 :                                 replace_extent_mapping(em_tree, em, split,
     863             :                                                        modified);
     864             :                         } else {
     865           0 :                                 int ret;
     866             : 
     867           0 :                                 ret = add_extent_mapping(em_tree, split,
     868             :                                                          modified);
     869             :                                 /* Logic error, shouldn't happen. */
     870           0 :                                 ASSERT(ret == 0);
     871           0 :                                 if (WARN_ON(ret != 0) && modified)
     872           0 :                                         btrfs_set_inode_full_sync(inode);
     873             :                         }
     874           0 :                         free_extent_map(split);
     875           0 :                         split = NULL;
     876             :                 }
     877           0 : remove_em:
     878           0 :                 if (extent_map_in_tree(em)) {
     879             :                         /*
     880             :                          * If the extent map is still in the tree it means that
     881             :                          * either of the following is true:
     882             :                          *
     883             :                          * 1) It fits entirely in our range (doesn't end beyond
     884             :                          *    it or starts before it);
     885             :                          *
     886             :                          * 2) It starts before our range and/or ends after our
     887             :                          *    range, and we were not able to allocate the extent
     888             :                          *    maps for split operations, @split and @split2.
     889             :                          *
     890             :                          * If we are at case 2) then we just remove the entire
     891             :                          * extent map - this is fine since if anyone needs it to
     892             :                          * access the subranges outside our range, will just
     893             :                          * load it again from the subvolume tree's file extent
     894             :                          * item. However if the extent map was in the list of
     895             :                          * modified extents, then we must mark the inode for a
     896             :                          * full fsync, otherwise a fast fsync will miss this
     897             :                          * extent if it's new and needs to be logged.
     898             :                          */
     899           0 :                         if ((em->start < start || em_end > end) && modified) {
     900           0 :                                 ASSERT(!split);
     901           0 :                                 btrfs_set_inode_full_sync(inode);
     902             :                         }
     903           0 :                         remove_extent_mapping(em_tree, em);
     904             :                 }
     905             : 
     906             :                 /*
     907             :                  * Once for the tree reference (we replaced or removed the
     908             :                  * extent map from the tree).
     909             :                  */
     910           0 :                 free_extent_map(em);
     911           0 : next:
     912             :                 /* Once for us (for our lookup reference). */
     913           0 :                 free_extent_map(em);
     914             : 
     915           0 :                 em = next_em;
     916             :         }
     917             : 
     918           0 :         write_unlock(&em_tree->lock);
     919             : 
     920           0 :         free_extent_map(split);
     921           0 :         free_extent_map(split2);
     922             : }
     923             : 
     924             : /*
     925             :  * Replace a range in the inode's extent map tree with a new extent map.
     926             :  *
     927             :  * @inode:      The target inode.
     928             :  * @new_em:     The new extent map to add to the inode's extent map tree.
     929             :  * @modified:   Indicate if the new extent map should be added to the list of
     930             :  *              modified extents (for fast fsync tracking).
     931             :  *
     932             :  * Drops all the extent maps in the inode's extent map tree that intersect the
     933             :  * range of the new extent map and adds the new extent map to the tree.
     934             :  * The caller should have locked an appropriate file range in the inode's io
     935             :  * tree before calling this function.
     936             :  */
     937           0 : int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
     938             :                                    struct extent_map *new_em,
     939             :                                    bool modified)
     940             : {
     941           0 :         const u64 end = new_em->start + new_em->len - 1;
     942           0 :         struct extent_map_tree *tree = &inode->extent_tree;
     943           0 :         int ret;
     944             : 
     945           0 :         ASSERT(!extent_map_in_tree(new_em));
     946             : 
     947             :         /*
     948             :          * The caller has locked an appropriate file range in the inode's io
     949             :          * tree, but getting -EEXIST when adding the new extent map can still
     950             :          * happen in case there are extents that partially cover the range, and
     951             :          * this is due to two tasks operating on different parts of the extent.
     952             :          * See commit 18e83ac75bfe67 ("Btrfs: fix unexpected EEXIST from
     953             :          * btrfs_get_extent") for an example and details.
     954             :          */
     955           0 :         do {
     956           0 :                 btrfs_drop_extent_map_range(inode, new_em->start, end, false);
     957           0 :                 write_lock(&tree->lock);
     958           0 :                 ret = add_extent_mapping(tree, new_em, modified);
     959           0 :                 write_unlock(&tree->lock);
     960           0 :         } while (ret == -EEXIST);
     961             : 
     962           0 :         return ret;
     963             : }
     964             : 
     965             : /*
     966             :  * Split off the first pre bytes from the extent_map at [start, start + len],
     967             :  * and set the block_start for it to new_logical.
     968             :  *
     969             :  * This function is used when an ordered_extent needs to be split.
     970             :  */
     971           0 : int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
     972             :                      u64 new_logical)
     973             : {
     974           0 :         struct extent_map_tree *em_tree = &inode->extent_tree;
     975           0 :         struct extent_map *em;
     976           0 :         struct extent_map *split_pre = NULL;
     977           0 :         struct extent_map *split_mid = NULL;
     978           0 :         int ret = 0;
     979           0 :         unsigned long flags;
     980             : 
     981           0 :         ASSERT(pre != 0);
     982           0 :         ASSERT(pre < len);
     983             : 
     984           0 :         split_pre = alloc_extent_map();
     985           0 :         if (!split_pre)
     986             :                 return -ENOMEM;
     987           0 :         split_mid = alloc_extent_map();
     988           0 :         if (!split_mid) {
     989           0 :                 ret = -ENOMEM;
     990           0 :                 goto out_free_pre;
     991             :         }
     992             : 
     993           0 :         lock_extent(&inode->io_tree, start, start + len - 1, NULL);
     994           0 :         write_lock(&em_tree->lock);
     995           0 :         em = lookup_extent_mapping(em_tree, start, len);
     996           0 :         if (!em) {
     997           0 :                 ret = -EIO;
     998           0 :                 goto out_unlock;
     999             :         }
    1000             : 
    1001           0 :         ASSERT(em->len == len);
    1002           0 :         ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
    1003           0 :         ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
    1004           0 :         ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
    1005           0 :         ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
    1006           0 :         ASSERT(!list_empty(&em->list));
    1007             : 
    1008           0 :         flags = em->flags;
    1009           0 :         clear_bit(EXTENT_FLAG_PINNED, &em->flags);
    1010             : 
    1011             :         /* First, replace the em with a new extent_map starting from * em->start */
    1012           0 :         split_pre->start = em->start;
    1013           0 :         split_pre->len = pre;
    1014           0 :         split_pre->orig_start = split_pre->start;
    1015           0 :         split_pre->block_start = new_logical;
    1016           0 :         split_pre->block_len = split_pre->len;
    1017           0 :         split_pre->orig_block_len = split_pre->block_len;
    1018           0 :         split_pre->ram_bytes = split_pre->len;
    1019           0 :         split_pre->flags = flags;
    1020           0 :         split_pre->compress_type = em->compress_type;
    1021           0 :         split_pre->generation = em->generation;
    1022             : 
    1023           0 :         replace_extent_mapping(em_tree, em, split_pre, 1);
    1024             : 
    1025             :         /*
    1026             :          * Now we only have an extent_map at:
    1027             :          *     [em->start, em->start + pre]
    1028             :          */
    1029             : 
    1030             :         /* Insert the middle extent_map. */
    1031           0 :         split_mid->start = em->start + pre;
    1032           0 :         split_mid->len = em->len - pre;
    1033           0 :         split_mid->orig_start = split_mid->start;
    1034           0 :         split_mid->block_start = em->block_start + pre;
    1035           0 :         split_mid->block_len = split_mid->len;
    1036           0 :         split_mid->orig_block_len = split_mid->block_len;
    1037           0 :         split_mid->ram_bytes = split_mid->len;
    1038           0 :         split_mid->flags = flags;
    1039           0 :         split_mid->compress_type = em->compress_type;
    1040           0 :         split_mid->generation = em->generation;
    1041           0 :         add_extent_mapping(em_tree, split_mid, 1);
    1042             : 
    1043             :         /* Once for us */
    1044           0 :         free_extent_map(em);
    1045             :         /* Once for the tree */
    1046           0 :         free_extent_map(em);
    1047             : 
    1048           0 : out_unlock:
    1049           0 :         write_unlock(&em_tree->lock);
    1050           0 :         unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
    1051           0 :         free_extent_map(split_mid);
    1052           0 : out_free_pre:
    1053           0 :         free_extent_map(split_pre);
    1054           0 :         return ret;
    1055             : }

Generated by: LCOV version 1.14