LCOV - code coverage report
Current view: top level - fs/btrfs - extent_map.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-djwx @ Mon Jul 31 20:08:22 PDT 2023 Lines: 475 501 94.8 %
Date: 2023-07-31 20:08:22 Functions: 25 26 96.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : 
       3             : #include <linux/err.h>
       4             : #include <linux/slab.h>
       5             : #include <linux/spinlock.h>
       6             : #include "messages.h"
       7             : #include "ctree.h"
       8             : #include "volumes.h"
       9             : #include "extent_map.h"
      10             : #include "compression.h"
      11             : #include "btrfs_inode.h"
      12             : 
      13             : 
      14             : static struct kmem_cache *extent_map_cache;
      15             : 
      16          11 : int __init extent_map_init(void)
      17             : {
      18          11 :         extent_map_cache = kmem_cache_create("btrfs_extent_map",
      19             :                         sizeof(struct extent_map), 0,
      20             :                         SLAB_MEM_SPREAD, NULL);
      21          11 :         if (!extent_map_cache)
      22           0 :                 return -ENOMEM;
      23             :         return 0;
      24             : }
      25             : 
      26           0 : void __cold extent_map_exit(void)
      27             : {
      28           0 :         kmem_cache_destroy(extent_map_cache);
      29           0 : }
      30             : 
      31             : /*
      32             :  * Initialize the extent tree @tree.  Should be called for each new inode or
      33             :  * other user of the extent_map interface.
      34             :  */
      35     3864913 : void extent_map_tree_init(struct extent_map_tree *tree)
      36             : {
      37     3864913 :         tree->map = RB_ROOT_CACHED;
      38     3864913 :         INIT_LIST_HEAD(&tree->modified_extents);
      39     3864913 :         rwlock_init(&tree->lock);
      40     3864540 : }
      41             : 
      42             : /*
      43             :  * Allocate a new extent_map structure.  The new structure is returned with a
      44             :  * reference count of one and needs to be freed using free_extent_map()
      45             :  */
      46    74240044 : struct extent_map *alloc_extent_map(void)
      47             : {
      48    74240044 :         struct extent_map *em;
      49    74240044 :         em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS);
      50    74258737 :         if (!em)
      51             :                 return NULL;
      52    74258737 :         RB_CLEAR_NODE(&em->rb_node);
      53    74258737 :         em->compress_type = BTRFS_COMPRESS_NONE;
      54    74258737 :         refcount_set(&em->refs, 1);
      55    74258737 :         INIT_LIST_HEAD(&em->list);
      56    74258737 :         return em;
      57             : }
      58             : 
      59             : /*
      60             :  * Drop the reference out on @em by one and free the structure if the reference
      61             :  * count hits zero.
      62             :  */
      63   310515895 : void free_extent_map(struct extent_map *em)
      64             : {
      65   310515895 :         if (!em)
      66             :                 return;
      67   299008640 :         if (refcount_dec_and_test(&em->refs)) {
      68    74271552 :                 WARN_ON(extent_map_in_tree(em));
      69    74271552 :                 WARN_ON(!list_empty(&em->list));
      70   148543104 :                 if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
      71       26375 :                         kfree(em->map_lookup);
      72    74271552 :                 kmem_cache_free(extent_map_cache, em);
      73             :         }
      74             : }
      75             : 
      76             : /* Do the math around the end of an extent, handling wrapping. */
      77             : static u64 range_end(u64 start, u64 len)
      78             : {
      79   195962037 :         if (start + len < start)
      80      170433 :                 return (u64)-1;
      81             :         return start + len;
      82             : }
      83             : 
      84    20314788 : static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
      85             : {
      86    20314788 :         struct rb_node **p = &root->rb_root.rb_node;
      87    20314788 :         struct rb_node *parent = NULL;
      88    20314788 :         struct extent_map *entry = NULL;
      89    20314788 :         struct rb_node *orig_parent = NULL;
      90    20314788 :         u64 end = range_end(em->start, em->len);
      91    20314788 :         bool leftmost = true;
      92             : 
      93   334753152 :         while (*p) {
      94   314438739 :                 parent = *p;
      95   314438739 :                 entry = rb_entry(parent, struct extent_map, rb_node);
      96             : 
      97   314438739 :                 if (em->start < entry->start) {
      98    19371873 :                         p = &(*p)->rb_left;
      99   295066866 :                 } else if (em->start >= extent_map_end(entry)) {
     100   295066491 :                         p = &(*p)->rb_right;
     101   295066491 :                         leftmost = false;
     102             :                 } else {
     103             :                         return -EEXIST;
     104             :                 }
     105             :         }
     106             : 
     107             :         orig_parent = parent;
     108    33809017 :         while (parent && em->start >= extent_map_end(entry)) {
     109    13493632 :                 parent = rb_next(parent);
     110    13493632 :                 entry = rb_entry(parent, struct extent_map, rb_node);
     111             :         }
     112    20315385 :         if (parent)
     113     8510369 :                 if (end > entry->start && em->start < extent_map_end(entry))
     114             :                         return -EEXIST;
     115             : 
     116             :         parent = orig_parent;
     117             :         entry = rb_entry(parent, struct extent_map, rb_node);
     118    25726399 :         while (parent && em->start < entry->start) {
     119     5422945 :                 parent = rb_prev(parent);
     120     5422945 :                 entry = rb_entry(parent, struct extent_map, rb_node);
     121             :         }
     122    20303454 :         if (parent)
     123    18860234 :                 if (end > entry->start && em->start < extent_map_end(entry))
     124             :                         return -EEXIST;
     125             : 
     126    20303454 :         rb_link_node(&em->rb_node, orig_parent, p);
     127    20303454 :         rb_insert_color_cached(&em->rb_node, root, leftmost);
     128    20303454 :         return 0;
     129             : }
     130             : 
     131             : /*
     132             :  * Search through the tree for an extent_map with a given offset.  If it can't
     133             :  * be found, try to find some neighboring extents
     134             :  */
     135   175680631 : static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
     136             :                                      struct rb_node **prev_or_next_ret)
     137             : {
     138   175680631 :         struct rb_node *n = root->rb_node;
     139   175680631 :         struct rb_node *prev = NULL;
     140   175680631 :         struct rb_node *orig_prev = NULL;
     141   175680631 :         struct extent_map *entry;
     142   175680631 :         struct extent_map *prev_entry = NULL;
     143             : 
     144   175680631 :         ASSERT(prev_or_next_ret);
     145             : 
     146  1298707302 :         while (n) {
     147  1269420959 :                 entry = rb_entry(n, struct extent_map, rb_node);
     148  1269420959 :                 prev = n;
     149  1269420959 :                 prev_entry = entry;
     150             : 
     151  1269420959 :                 if (offset < entry->start)
     152   146564756 :                         n = n->rb_left;
     153  1122856203 :                 else if (offset >= extent_map_end(entry))
     154   976461915 :                         n = n->rb_right;
     155             :                 else
     156   146394288 :                         return n;
     157             :         }
     158             : 
     159             :         orig_prev = prev;
     160    63506208 :         while (prev && offset >= extent_map_end(prev_entry)) {
     161    14287597 :                 prev = rb_next(prev);
     162    14287597 :                 prev_entry = rb_entry(prev, struct extent_map, rb_node);
     163             :         }
     164             : 
     165             :         /*
     166             :          * Previous extent map found, return as in this case the caller does not
     167             :          * care about the next one.
     168             :          */
     169    29284797 :         if (prev) {
     170     5646190 :                 *prev_or_next_ret = prev;
     171     5646190 :                 return NULL;
     172             :         }
     173             : 
     174             :         prev = orig_prev;
     175             :         prev_entry = rb_entry(prev, struct extent_map, rb_node);
     176    23637913 :         while (prev && offset < prev_entry->start) {
     177           0 :                 prev = rb_prev(prev);
     178           0 :                 prev_entry = rb_entry(prev, struct extent_map, rb_node);
     179             :         }
     180    23637913 :         *prev_or_next_ret = prev;
     181             : 
     182    23637913 :         return NULL;
     183             : }
     184             : 
     185             : /* Check to see if two extent_map structs are adjacent and safe to merge. */
     186    18931280 : static int mergable_maps(struct extent_map *prev, struct extent_map *next)
     187             : {
     188    37862560 :         if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
     189             :                 return 0;
     190             : 
     191             :         /*
     192             :          * don't merge compressed extents, we need to know their
     193             :          * actual size
     194             :          */
     195    37136442 :         if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
     196             :                 return 0;
     197             : 
     198    36554090 :         if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
     199    18277045 :             test_bit(EXTENT_FLAG_LOGGING, &next->flags))
     200             :                 return 0;
     201             : 
     202             :         /*
     203             :          * We don't want to merge stuff that hasn't been written to the log yet
     204             :          * since it may not reflect exactly what is on disk, and that would be
     205             :          * bad.
     206             :          */
     207    18166991 :         if (!list_empty(&prev->list) || !list_empty(&next->list))
     208             :                 return 0;
     209             : 
     210     9339814 :         ASSERT(next->block_start != EXTENT_MAP_DELALLOC &&
     211             :                prev->block_start != EXTENT_MAP_DELALLOC);
     212             : 
     213     9339814 :         if (prev->map_lookup || next->map_lookup)
     214       23218 :                 ASSERT(test_bit(EXTENT_FLAG_FS_MAPPING, &prev->flags) &&
     215             :                        test_bit(EXTENT_FLAG_FS_MAPPING, &next->flags));
     216             : 
     217     9339814 :         if (extent_map_end(prev) == next->start &&
     218     6985629 :             prev->flags == next->flags &&
     219     3808168 :             prev->map_lookup == next->map_lookup &&
     220     2814345 :             ((next->block_start == EXTENT_MAP_HOLE &&
     221     3786797 :               prev->block_start == EXTENT_MAP_HOLE) ||
     222           0 :              (next->block_start == EXTENT_MAP_INLINE &&
     223     1268501 :               prev->block_start == EXTENT_MAP_INLINE) ||
     224      972451 :              (next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
     225             :               next->block_start == extent_map_block_end(prev)))) {
     226     2578972 :                 return 1;
     227             :         }
     228             :         return 0;
     229             : }
     230             : 
     231    15613102 : static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
     232             : {
     233    15613102 :         struct extent_map *merge = NULL;
     234    15613102 :         struct rb_node *rb;
     235             : 
     236             :         /*
     237             :          * We can't modify an extent map that is in the tree and that is being
     238             :          * used by another task, as it can cause that other task to see it in
     239             :          * inconsistent state during the merging. We always have 1 reference for
     240             :          * the tree and 1 for this task (which is unpinning the extent map or
     241             :          * clearing the logging flag), so anything > 2 means it's being used by
     242             :          * other tasks too.
     243             :          */
     244    15613102 :         if (refcount_read(&em->refs) > 2)
     245             :                 return;
     246             : 
     247    15606187 :         if (em->start != 0) {
     248    14262567 :                 rb = rb_prev(&em->rb_node);
     249    14262489 :                 if (rb)
     250    14196217 :                         merge = rb_entry(rb, struct extent_map, rb_node);
     251    14262489 :                 if (rb && mergable_maps(merge, em)) {
     252     2527827 :                         em->start = merge->start;
     253     2527827 :                         em->orig_start = merge->orig_start;
     254     2527827 :                         em->len += merge->len;
     255     2527827 :                         em->block_len += merge->block_len;
     256     2527827 :                         em->block_start = merge->block_start;
     257     2527827 :                         em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
     258     2527827 :                         em->mod_start = merge->mod_start;
     259     2527827 :                         em->generation = max(em->generation, merge->generation);
     260     2527827 :                         set_bit(EXTENT_FLAG_MERGED, &em->flags);
     261             : 
     262     2527828 :                         rb_erase_cached(&merge->rb_node, &tree->map);
     263     2527827 :                         RB_CLEAR_NODE(&merge->rb_node);
     264     2527827 :                         free_extent_map(merge);
     265             :                 }
     266             :         }
     267             : 
     268    15604914 :         rb = rb_next(&em->rb_node);
     269    15606007 :         if (rb)
     270     4735468 :                 merge = rb_entry(rb, struct extent_map, rb_node);
     271    15606007 :         if (rb && mergable_maps(em, merge)) {
     272       51144 :                 em->len += merge->len;
     273       51144 :                 em->block_len += merge->block_len;
     274       51144 :                 rb_erase_cached(&merge->rb_node, &tree->map);
     275       51144 :                 RB_CLEAR_NODE(&merge->rb_node);
     276       51144 :                 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
     277       51144 :                 em->generation = max(em->generation, merge->generation);
     278       51144 :                 set_bit(EXTENT_FLAG_MERGED, &em->flags);
     279       51144 :                 free_extent_map(merge);
     280             :         }
     281             : }
     282             : 
     283             : /*
     284             :  * Unpin an extent from the cache.
     285             :  *
     286             :  * @tree:       tree to unpin the extent in
     287             :  * @start:      logical offset in the file
     288             :  * @len:        length of the extent
     289             :  * @gen:        generation that this extent has been modified in
     290             :  *
     291             :  * Called after an extent has been written to disk properly.  Set the generation
     292             :  * to the generation that actually added the file item to the inode so we know
     293             :  * we need to sync this extent when we call fsync().
     294             :  */
     295     3503880 : int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
     296             :                        u64 gen)
     297             : {
     298     3503880 :         int ret = 0;
     299     3503880 :         struct extent_map *em;
     300     3503880 :         bool prealloc = false;
     301             : 
     302     3503880 :         write_lock(&tree->lock);
     303     3503881 :         em = lookup_extent_mapping(tree, start, len);
     304             : 
     305     7007822 :         WARN_ON(!em || em->start != start);
     306             : 
     307     3503913 :         if (!em)
     308           0 :                 goto out;
     309             : 
     310     3503913 :         em->generation = gen;
     311     3503913 :         clear_bit(EXTENT_FLAG_PINNED, &em->flags);
     312     3503917 :         em->mod_start = em->start;
     313     3503917 :         em->mod_len = em->len;
     314             : 
     315     7007834 :         if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) {
     316      291285 :                 prealloc = true;
     317      291285 :                 clear_bit(EXTENT_FLAG_FILLING, &em->flags);
     318             :         }
     319             : 
     320     3503917 :         try_merge_map(tree, em);
     321             : 
     322     3503796 :         if (prealloc) {
     323      291285 :                 em->mod_start = em->start;
     324      291285 :                 em->mod_len = em->len;
     325             :         }
     326             : 
     327     3503796 :         free_extent_map(em);
     328     3503900 : out:
     329     3503900 :         write_unlock(&tree->lock);
     330     3503831 :         return ret;
     331             : 
     332             : }
     333             : 
     334      325187 : void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
     335             : {
     336      325187 :         lockdep_assert_held_write(&tree->lock);
     337             : 
     338      325187 :         clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
     339      325219 :         if (extent_map_in_tree(em))
     340      325218 :                 try_merge_map(tree, em);
     341      325205 : }
     342             : 
     343    26917182 : static inline void setup_extent_mapping(struct extent_map_tree *tree,
     344             :                                         struct extent_map *em,
     345             :                                         int modified)
     346             : {
     347    26917182 :         refcount_inc(&em->refs);
     348    26922593 :         em->mod_start = em->start;
     349    26922593 :         em->mod_len = em->len;
     350             : 
     351    26922593 :         if (modified)
     352    15138116 :                 list_move(&em->list, &tree->modified_extents);
     353             :         else
     354    11784477 :                 try_merge_map(tree, em);
     355    26920802 : }
     356             : 
     357       26410 : static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
     358             : {
     359       26410 :         struct map_lookup *map = em->map_lookup;
     360       26410 :         u64 stripe_size = em->orig_block_len;
     361       26410 :         int i;
     362             : 
     363       60186 :         for (i = 0; i < map->num_stripes; i++) {
     364       33776 :                 struct btrfs_io_stripe *stripe = &map->stripes[i];
     365       33776 :                 struct btrfs_device *device = stripe->dev;
     366             : 
     367       33776 :                 set_extent_bit(&device->alloc_state, stripe->physical,
     368       33776 :                                stripe->physical + stripe_size - 1,
     369             :                                bits | EXTENT_NOWAIT, NULL);
     370             :         }
     371       26410 : }
     372             : 
     373       52785 : static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
     374             : {
     375       52785 :         struct map_lookup *map = em->map_lookup;
     376       52785 :         u64 stripe_size = em->orig_block_len;
     377       52785 :         int i;
     378             : 
     379      120301 :         for (i = 0; i < map->num_stripes; i++) {
     380       67516 :                 struct btrfs_io_stripe *stripe = &map->stripes[i];
     381       67516 :                 struct btrfs_device *device = stripe->dev;
     382             : 
     383       67516 :                 __clear_extent_bit(&device->alloc_state, stripe->physical,
     384       67516 :                                    stripe->physical + stripe_size - 1,
     385             :                                    bits | EXTENT_NOWAIT,
     386             :                                    NULL, NULL);
     387             :         }
     388       52785 : }
     389             : 
     390             : /*
     391             :  * Add new extent map to the extent tree
     392             :  *
     393             :  * @tree:       tree to insert new map in
     394             :  * @em:         map to insert
     395             :  * @modified:   indicate whether the given @em should be added to the
     396             :  *              modified list, which indicates the extent needs to be logged
     397             :  *
     398             :  * Insert @em into @tree or perform a simple forward/backward merge with
     399             :  * existing mappings.  The extent_map struct passed in will be inserted
     400             :  * into the tree directly, with an additional reference taken, or a
     401             :  * reference dropped if the merge attempt was successful.
     402             :  */
     403    20313648 : int add_extent_mapping(struct extent_map_tree *tree,
     404             :                        struct extent_map *em, int modified)
     405             : {
     406    20313648 :         int ret = 0;
     407             : 
     408    20313648 :         lockdep_assert_held_write(&tree->lock);
     409             : 
     410    20313648 :         ret = tree_insert(&tree->map, em);
     411    20312849 :         if (ret)
     412       11767 :                 goto out;
     413             : 
     414    20301082 :         setup_extent_mapping(tree, em, modified);
     415    40601580 :         if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) {
     416       26410 :                 extent_map_device_set_bits(em, CHUNK_ALLOCATED);
     417       26410 :                 extent_map_device_clear_bits(em, CHUNK_TRIMMED);
     418             :         }
     419    20274380 : out:
     420    20312557 :         return ret;
     421             : }
     422             : 
     423             : static struct extent_map *
     424   175647249 : __lookup_extent_mapping(struct extent_map_tree *tree,
     425             :                         u64 start, u64 len, int strict)
     426             : {
     427   175647249 :         struct extent_map *em;
     428   175647249 :         struct rb_node *rb_node;
     429   175647249 :         struct rb_node *prev_or_next = NULL;
     430   175647249 :         u64 end = range_end(start, len);
     431             : 
     432   175647249 :         rb_node = __tree_search(&tree->map.rb_root, start, &prev_or_next);
     433   175663353 :         if (!rb_node) {
     434    29280647 :                 if (prev_or_next)
     435             :                         rb_node = prev_or_next;
     436             :                 else
     437             :                         return NULL;
     438             :         }
     439             : 
     440   164779084 :         em = rb_entry(rb_node, struct extent_map, rb_node);
     441             : 
     442   320542905 :         if (strict && !(end > em->start && start < extent_map_end(em)))
     443             :                 return NULL;
     444             : 
     445   146602856 :         refcount_inc(&em->refs);
     446   146602856 :         return em;
     447             : }
     448             : 
     449             : /*
     450             :  * Lookup extent_map that intersects @start + @len range.
     451             :  *
     452             :  * @tree:       tree to lookup in
     453             :  * @start:      byte offset to start the search
     454             :  * @len:        length of the lookup range
     455             :  *
     456             :  * Find and return the first extent_map struct in @tree that intersects the
     457             :  * [start, len] range.  There may be additional objects in the tree that
     458             :  * intersect, so check the object returned carefully to make sure that no
     459             :  * additional lookups are needed.
     460             :  */
     461   139382063 : struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
     462             :                                          u64 start, u64 len)
     463             : {
     464   142885944 :         return __lookup_extent_mapping(tree, start, len, 1);
     465             : }
     466             : 
     467             : /*
     468             :  * Find a nearby extent map intersecting @start + @len (not an exact search).
     469             :  *
     470             :  * @tree:       tree to lookup in
     471             :  * @start:      byte offset to start the search
     472             :  * @len:        length of the lookup range
     473             :  *
     474             :  * Find and return the first extent_map struct in @tree that intersects the
     475             :  * [start, len] range.
     476             :  *
     477             :  * If one can't be found, any nearby extent may be returned
     478             :  */
     479     4167501 : struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
     480             :                                          u64 start, u64 len)
     481             : {
     482     4167501 :         return __lookup_extent_mapping(tree, start, len, 0);
     483             : }
     484             : 
     485             : /*
     486             :  * Remove an extent_map from the extent tree.
     487             :  *
     488             :  * @tree:       extent tree to remove from
     489             :  * @em:         extent map being removed
     490             :  *
     491             :  * Remove @em from @tree.  No reference counts are dropped, and no checks
     492             :  * are done to see if the range is in use.
     493             :  */
     494    17717513 : void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
     495             : {
     496    17717513 :         lockdep_assert_held_write(&tree->lock);
     497             : 
     498    17717513 :         WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
     499    17717513 :         rb_erase_cached(&em->rb_node, &tree->map);
     500    17718143 :         if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
     501    17718319 :                 list_del_init(&em->list);
     502    35434518 :         if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
     503       26375 :                 extent_map_device_clear_bits(em, CHUNK_ALLOCATED);
     504    17717259 :         RB_CLEAR_NODE(&em->rb_node);
     505    17717259 : }
     506             : 
     507     6618183 : static void replace_extent_mapping(struct extent_map_tree *tree,
     508             :                                    struct extent_map *cur,
     509             :                                    struct extent_map *new,
     510             :                                    int modified)
     511             : {
     512     6618183 :         lockdep_assert_held_write(&tree->lock);
     513             : 
     514     6618183 :         WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
     515     6618183 :         ASSERT(extent_map_in_tree(cur));
     516     6618183 :         if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
     517     6618181 :                 list_del_init(&cur->list);
     518     6618136 :         rb_replace_node_cached(&cur->rb_node, &new->rb_node, &tree->map);
     519     6618141 :         RB_CLEAR_NODE(&cur->rb_node);
     520             : 
     521     6618141 :         setup_extent_mapping(tree, new, modified);
     522     6618163 : }
     523             : 
     524             : static struct extent_map *next_extent_map(const struct extent_map *em)
     525             : {
     526     6404706 :         struct rb_node *next;
     527             : 
     528     6404706 :         next = rb_next(&em->rb_node);
     529     6404712 :         if (!next)
     530           2 :                 return NULL;
     531             :         return container_of(next, struct extent_map, rb_node);
     532             : }
     533             : 
     534             : static struct extent_map *prev_extent_map(struct extent_map *em)
     535             : {
     536       11731 :         struct rb_node *prev;
     537             : 
     538       11731 :         prev = rb_prev(&em->rb_node);
     539       11731 :         if (!prev)
     540        1205 :                 return NULL;
     541             :         return container_of(prev, struct extent_map, rb_node);
     542             : }
     543             : 
     544             : /*
     545             :  * Helper for btrfs_get_extent.  Given an existing extent in the tree,
     546             :  * the existing extent is the nearest extent to map_start,
     547             :  * and an extent that you want to insert, deal with overlap and insert
     548             :  * the best fitted new extent into the tree.
     549             :  */
     550       11733 : static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
     551             :                                          struct extent_map *existing,
     552             :                                          struct extent_map *em,
     553             :                                          u64 map_start)
     554             : {
     555       11733 :         struct extent_map *prev;
     556       11733 :         struct extent_map *next;
     557       11733 :         u64 start;
     558       11733 :         u64 end;
     559       11733 :         u64 start_diff;
     560             : 
     561       23466 :         BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
     562             : 
     563       11733 :         if (existing->start > map_start) {
     564       11731 :                 next = existing;
     565       11731 :                 prev = prev_extent_map(next);
     566             :         } else {
     567           2 :                 prev = existing;
     568           2 :                 next = next_extent_map(prev);
     569             :         }
     570             : 
     571       11733 :         start = prev ? extent_map_end(prev) : em->start;
     572       11733 :         start = max_t(u64, start, em->start);
     573       11733 :         end = next ? next->start : extent_map_end(em);
     574       11733 :         end = min_t(u64, end, extent_map_end(em));
     575       11733 :         start_diff = start - em->start;
     576       11733 :         em->start = start;
     577       11733 :         em->len = end - start;
     578       12333 :         if (em->block_start < EXTENT_MAP_LAST_BYTE &&
     579         600 :             !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
     580         600 :                 em->block_start += start_diff;
     581         600 :                 em->block_len = em->len;
     582             :         }
     583       11733 :         return add_extent_mapping(em_tree, em, 0);
     584             : }
     585             : 
     586             : /*
     587             :  * Add extent mapping into em_tree.
     588             :  *
     589             :  * @fs_info:  the filesystem
     590             :  * @em_tree:  extent tree into which we want to insert the extent mapping
     591             :  * @em_in:    extent we are inserting
     592             :  * @start:    start of the logical range btrfs_get_extent() is requesting
     593             :  * @len:      length of the logical range btrfs_get_extent() is requesting
     594             :  *
     595             :  * Note that @em_in's range may be different from [start, start+len),
     596             :  * but they must be overlapped.
     597             :  *
     598             :  * Insert @em_in into @em_tree. In case there is an overlapping range, handle
     599             :  * the -EEXIST by either:
     600             :  * a) Returning the existing extent in @em_in if @start is within the
     601             :  *    existing em.
     602             :  * b) Merge the existing extent with @em_in passed in.
     603             :  *
     604             :  * Return 0 on success, otherwise -EEXIST.
     605             :  *
     606             :  */
     607     8054921 : int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
     608             :                              struct extent_map_tree *em_tree,
     609             :                              struct extent_map **em_in, u64 start, u64 len)
     610             : {
     611     8054921 :         int ret;
     612     8054921 :         struct extent_map *em = *em_in;
     613             : 
     614             :         /*
     615             :          * Tree-checker should have rejected any inline extent with non-zero
     616             :          * file offset. Here just do a sanity check.
     617             :          */
     618     8054921 :         if (em->block_start == EXTENT_MAP_INLINE)
     619             :                 ASSERT(em->start == 0);
     620             : 
     621     8054921 :         ret = add_extent_mapping(em_tree, em, 0);
     622             :         /* it is possible that someone inserted the extent into the tree
     623             :          * while we had the lock dropped.  It is also possible that
     624             :          * an overlapping map exists in the tree
     625             :          */
     626     8053404 :         if (ret == -EEXIST) {
     627       11767 :                 struct extent_map *existing;
     628             : 
     629       11767 :                 ret = 0;
     630             : 
     631       11767 :                 existing = search_extent_mapping(em_tree, start, len);
     632             : 
     633       11767 :                 trace_btrfs_handle_em_exist(fs_info, existing, em, start, len);
     634             : 
     635             :                 /*
     636             :                  * existing will always be non-NULL, since there must be
     637             :                  * extent causing the -EEXIST.
     638             :                  */
     639       11803 :                 if (start >= existing->start &&
     640             :                     start < extent_map_end(existing)) {
     641          34 :                         free_extent_map(em);
     642          34 :                         *em_in = existing;
     643          34 :                         ret = 0;
     644             :                 } else {
     645       11733 :                         u64 orig_start = em->start;
     646       11733 :                         u64 orig_len = em->len;
     647             : 
     648             :                         /*
     649             :                          * The existing extent map is the one nearest to
     650             :                          * the [start, start + len) range which overlaps
     651             :                          */
     652       11733 :                         ret = merge_extent_mapping(em_tree, existing,
     653             :                                                    em, start);
     654       11733 :                         if (ret) {
     655           0 :                                 free_extent_map(em);
     656           0 :                                 *em_in = NULL;
     657           0 :                                 WARN_ONCE(ret,
     658             : "unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n",
     659             :                                           ret, existing->start, existing->len,
     660             :                                           orig_start, orig_len);
     661             :                         }
     662       11733 :                         free_extent_map(existing);
     663             :                 }
     664             :         }
     665             : 
     666     8053404 :         ASSERT(ret == 0 || ret == -EEXIST);
     667     8053404 :         return ret;
     668             : }
     669             : 
     670             : /*
     671             :  * Drop all extent maps from a tree in the fastest possible way, rescheduling
     672             :  * if needed. This avoids searching the tree, from the root down to the first
     673             :  * extent map, before each deletion.
     674             :  */
     675     7808639 : static void drop_all_extent_maps_fast(struct extent_map_tree *tree)
     676             : {
     677     7808639 :         write_lock(&tree->lock);
     678    16687061 :         while (!RB_EMPTY_ROOT(&tree->map.rb_root)) {
     679     8878202 :                 struct extent_map *em;
     680     8878202 :                 struct rb_node *node;
     681             : 
     682     8878202 :                 node = rb_first_cached(&tree->map);
     683     8878202 :                 em = rb_entry(node, struct extent_map, rb_node);
     684     8878202 :                 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
     685     8878391 :                 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
     686     8878428 :                 remove_extent_mapping(tree, em);
     687     8878311 :                 free_extent_map(em);
     688     8878258 :                 cond_resched_rwlock_write(&tree->lock);
     689             :         }
     690     7809198 :         write_unlock(&tree->lock);
     691     7809030 : }
     692             : 
     693             : /*
     694             :  * Drop all extent maps in a given range.
     695             :  *
     696             :  * @inode:       The target inode.
     697             :  * @start:       Start offset of the range.
     698             :  * @end:         End offset of the range (inclusive value).
     699             :  * @skip_pinned: Indicate if pinned extent maps should be ignored or not.
     700             :  *
     701             :  * This drops all the extent maps that intersect the given range [@start, @end].
     702             :  * Extent maps that partially overlap the range and extend behind or beyond it,
     703             :  * are split.
     704             :  * The caller should have locked an appropriate file range in the inode's io
     705             :  * tree before calling this function.
     706             :  */
     707    36412183 : void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
     708             :                                  bool skip_pinned)
     709             : {
     710    36412183 :         struct extent_map *split;
     711    36412183 :         struct extent_map *split2;
     712    36412183 :         struct extent_map *em;
     713    36412183 :         struct extent_map_tree *em_tree = &inode->extent_tree;
     714    36412183 :         u64 len = end - start + 1;
     715             : 
     716    36412183 :         WARN_ON(end < start);
     717    36412183 :         if (end == (u64)-1) {
     718     7999584 :                 if (start == 0 && !skip_pinned) {
     719     7808807 :                         drop_all_extent_maps_fast(em_tree);
     720     7808807 :                         return;
     721             :                 }
     722             :                 len = (u64)-1;
     723             :         } else {
     724             :                 /* Make end offset exclusive for use in the loop below. */
     725    28412599 :                 end++;
     726             :         }
     727             : 
     728             :         /*
     729             :          * It's ok if we fail to allocate the extent maps, see the comment near
     730             :          * the bottom of the loop below. We only need two spare extent maps in
     731             :          * the worst case, where the first extent map that intersects our range
     732             :          * starts before the range and the last extent map that intersects our
     733             :          * range ends after our range (and they might be the same extent map),
     734             :          * because we need to split those two extent maps at the boundaries.
     735             :          */
     736    28603376 :         split = alloc_extent_map();
     737    28602048 :         split2 = alloc_extent_map();
     738             : 
     739    28607013 :         write_lock(&em_tree->lock);
     740    28607625 :         em = lookup_extent_mapping(em_tree, start, len);
     741             : 
     742    43987902 :         while (em) {
     743             :                 /* extent_map_end() returns exclusive value (last byte + 1). */
     744    15382197 :                 const u64 em_end = extent_map_end(em);
     745    15382197 :                 struct extent_map *next_em = NULL;
     746    15382197 :                 u64 gen;
     747    15382197 :                 unsigned long flags;
     748    15382197 :                 bool modified;
     749    15382197 :                 bool compressed;
     750             : 
     751    15382197 :                 if (em_end < end) {
     752     6404704 :                         next_em = next_extent_map(em);
     753     6215128 :                         if (next_em) {
     754     6215128 :                                 if (next_em->start < end)
     755     6165563 :                                         refcount_inc(&next_em->refs);
     756             :                                 else
     757             :                                         next_em = NULL;
     758             :                         }
     759             :                 }
     760             : 
     761    16727589 :                 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
     762          87 :                         start = em_end;
     763          87 :                         if (end != (u64)-1)
     764             :                                 len = start + len - em_end;
     765          87 :                         goto next;
     766             :                 }
     767             : 
     768    15382110 :                 flags = em->flags;
     769    15382110 :                 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
     770             :                 /*
     771             :                  * In case we split the extent map, we want to preserve the
     772             :                  * EXTENT_FLAG_LOGGING flag on our extent map, but we don't want
     773             :                  * it on the new extent maps.
     774             :                  */
     775    15382940 :                 clear_bit(EXTENT_FLAG_LOGGING, &flags);
     776    15383336 :                 modified = !list_empty(&em->list);
     777             : 
     778             :                 /*
     779             :                  * The extent map does not cross our target range, so no need to
     780             :                  * split it, we can remove it directly.
     781             :                  */
     782    15383336 :                 if (em->start >= start && em_end <= end)
     783     8767982 :                         goto remove_em;
     784             : 
     785     6615354 :                 gen = em->generation;
     786     6615354 :                 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
     787             : 
     788     6615354 :                 if (em->start < start) {
     789     4815922 :                         if (!split) {
     790           0 :                                 split = split2;
     791           0 :                                 split2 = NULL;
     792           0 :                                 if (!split)
     793           0 :                                         goto remove_em;
     794             :                         }
     795     4815922 :                         split->start = em->start;
     796     4815922 :                         split->len = start - em->start;
     797             : 
     798     4815922 :                         if (em->block_start < EXTENT_MAP_LAST_BYTE) {
     799     3006974 :                                 split->orig_start = em->orig_start;
     800     3006974 :                                 split->block_start = em->block_start;
     801             : 
     802     3006974 :                                 if (compressed)
     803          82 :                                         split->block_len = em->block_len;
     804             :                                 else
     805     3006892 :                                         split->block_len = split->len;
     806     3006974 :                                 split->orig_block_len = max(split->block_len,
     807             :                                                 em->orig_block_len);
     808     3006974 :                                 split->ram_bytes = em->ram_bytes;
     809             :                         } else {
     810     1808948 :                                 split->orig_start = split->start;
     811     1808948 :                                 split->block_len = 0;
     812     1808948 :                                 split->block_start = em->block_start;
     813     1808948 :                                 split->orig_block_len = 0;
     814     1808948 :                                 split->ram_bytes = split->len;
     815             :                         }
     816             : 
     817     4815922 :                         split->generation = gen;
     818     4815922 :                         split->flags = flags;
     819     4815922 :                         split->compress_type = em->compress_type;
     820     4815922 :                         replace_extent_mapping(em_tree, em, split, modified);
     821     4815839 :                         free_extent_map(split);
     822     4815839 :                         split = split2;
     823     4815839 :                         split2 = NULL;
     824             :                 }
     825     6615334 :                 if (em_end > end) {
     826     5041567 :                         if (!split) {
     827           0 :                                 split = split2;
     828           0 :                                 split2 = NULL;
     829           0 :                                 if (!split)
     830           0 :                                         goto remove_em;
     831             :                         }
     832     5041567 :                         split->start = start + len;
     833     5041567 :                         split->len = em_end - (start + len);
     834     5041567 :                         split->block_start = em->block_start;
     835     5041567 :                         split->flags = flags;
     836     5041567 :                         split->compress_type = em->compress_type;
     837     5041567 :                         split->generation = gen;
     838             : 
     839     5041567 :                         if (em->block_start < EXTENT_MAP_LAST_BYTE) {
     840     3171213 :                                 split->orig_block_len = max(em->block_len,
     841             :                                                     em->orig_block_len);
     842             : 
     843     3171213 :                                 split->ram_bytes = em->ram_bytes;
     844     3171213 :                                 if (compressed) {
     845         166 :                                         split->block_len = em->block_len;
     846         166 :                                         split->orig_start = em->orig_start;
     847             :                                 } else {
     848     3171047 :                                         const u64 diff = start + len - em->start;
     849             : 
     850     3171047 :                                         split->block_len = split->len;
     851     3171047 :                                         split->block_start += diff;
     852     3171047 :                                         split->orig_start = em->orig_start;
     853             :                                 }
     854             :                         } else {
     855     1870354 :                                 split->ram_bytes = split->len;
     856     1870354 :                                 split->orig_start = split->start;
     857     1870354 :                                 split->block_len = 0;
     858     1870354 :                                 split->orig_block_len = 0;
     859             :                         }
     860             : 
     861     5041567 :                         if (extent_map_in_tree(em)) {
     862     1799420 :                                 replace_extent_mapping(em_tree, em, split,
     863             :                                                        modified);
     864             :                         } else {
     865     3242147 :                                 int ret;
     866             : 
     867     3242147 :                                 ret = add_extent_mapping(em_tree, split,
     868             :                                                          modified);
     869             :                                 /* Logic error, shouldn't happen. */
     870     3242156 :                                 ASSERT(ret == 0);
     871     3242156 :                                 if (WARN_ON(ret != 0) && modified)
     872           0 :                                         btrfs_set_inode_full_sync(inode);
     873             :                         }
     874     5041592 :                         free_extent_map(split);
     875     5041592 :                         split = NULL;
     876             :                 }
     877     1573767 : remove_em:
     878    15383243 :                 if (extent_map_in_tree(em)) {
     879             :                         /*
     880             :                          * If the extent map is still in the tree it means that
     881             :                          * either of the following is true:
     882             :                          *
     883             :                          * 1) It fits entirely in our range (doesn't end beyond
     884             :                          *    it or starts before it);
     885             :                          *
     886             :                          * 2) It starts before our range and/or ends after our
     887             :                          *    range, and we were not able to allocate the extent
     888             :                          *    maps for split operations, @split and @split2.
     889             :                          *
     890             :                          * If we are at case 2) then we just remove the entire
     891             :                          * extent map - this is fine since if anyone needs it to
     892             :                          * access the subranges outside our range, will just
     893             :                          * load it again from the subvolume tree's file extent
     894             :                          * item. However if the extent map was in the list of
     895             :                          * modified extents, then we must mark the inode for a
     896             :                          * full fsync, otherwise a fast fsync will miss this
     897             :                          * extent if it's new and needs to be logged.
     898             :                          */
     899     8767866 :                         if ((em->start < start || em_end > end) && modified) {
     900           0 :                                 ASSERT(!split);
     901           0 :                                 btrfs_set_inode_full_sync(inode);
     902             :                         }
     903     8767866 :                         remove_extent_mapping(em_tree, em);
     904             :                 }
     905             : 
     906             :                 /*
     907             :                  * Once for the tree reference (we replaced or removed the
     908             :                  * extent map from the tree).
     909             :                  */
     910    15380532 :                 free_extent_map(em);
     911    15379487 : next:
     912             :                 /* Once for us (for our lookup reference). */
     913    15379487 :                 free_extent_map(em);
     914             : 
     915    15380277 :                 em = next_em;
     916             :         }
     917             : 
     918    28607055 :         write_unlock(&em_tree->lock);
     919             : 
     920    28607493 :         free_extent_map(split);
     921    28607011 :         free_extent_map(split2);
     922             : }
     923             : 
     924             : /*
     925             :  * Replace a range in the inode's extent map tree with a new extent map.
     926             :  *
     927             :  * @inode:      The target inode.
     928             :  * @new_em:     The new extent map to add to the inode's extent map tree.
     929             :  * @modified:   Indicate if the new extent map should be added to the list of
     930             :  *              modified extents (for fast fsync tracking).
     931             :  *
     932             :  * Drops all the extent maps in the inode's extent map tree that intersect the
     933             :  * range of the new extent map and adds the new extent map to the tree.
     934             :  * The caller should have locked an appropriate file range in the inode's io
     935             :  * tree before calling this function.
     936             :  */
     937     8975768 : int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
     938             :                                    struct extent_map *new_em,
     939             :                                    bool modified)
     940             : {
     941     8975768 :         const u64 end = new_em->start + new_em->len - 1;
     942     8975768 :         struct extent_map_tree *tree = &inode->extent_tree;
     943     8975768 :         int ret;
     944             : 
     945     8975768 :         ASSERT(!extent_map_in_tree(new_em));
     946             : 
     947             :         /*
     948             :          * The caller has locked an appropriate file range in the inode's io
     949             :          * tree, but getting -EEXIST when adding the new extent map can still
     950             :          * happen in case there are extents that partially cover the range, and
     951             :          * this is due to two tasks operating on different parts of the extent.
     952             :          * See commit 18e83ac75bfe67 ("Btrfs: fix unexpected EEXIST from
     953             :          * btrfs_get_extent") for an example and details.
     954             :          */
     955     8975768 :         do {
     956     8975768 :                 btrfs_drop_extent_map_range(inode, new_em->start, end, false);
     957     8977854 :                 write_lock(&tree->lock);
     958     8978758 :                 ret = add_extent_mapping(tree, new_em, modified);
     959     8977942 :                 write_unlock(&tree->lock);
     960     8977785 :         } while (ret == -EEXIST);
     961             : 
     962     8977785 :         return ret;
     963             : }
     964             : 
     965             : /*
     966             :  * Split off the first pre bytes from the extent_map at [start, start + len],
     967             :  * and set the block_start for it to new_logical.
     968             :  *
     969             :  * This function is used when an ordered_extent needs to be split.
     970             :  */
     971        2947 : int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
     972             :                      u64 new_logical)
     973             : {
     974        2947 :         struct extent_map_tree *em_tree = &inode->extent_tree;
     975        2947 :         struct extent_map *em;
     976        2947 :         struct extent_map *split_pre = NULL;
     977        2947 :         struct extent_map *split_mid = NULL;
     978        2947 :         int ret = 0;
     979        2947 :         unsigned long flags;
     980             : 
     981        2947 :         ASSERT(pre != 0);
     982        2947 :         ASSERT(pre < len);
     983             : 
     984        2947 :         split_pre = alloc_extent_map();
     985        2947 :         if (!split_pre)
     986             :                 return -ENOMEM;
     987        2947 :         split_mid = alloc_extent_map();
     988        2947 :         if (!split_mid) {
     989           0 :                 ret = -ENOMEM;
     990           0 :                 goto out_free_pre;
     991             :         }
     992             : 
     993        2947 :         lock_extent(&inode->io_tree, start, start + len - 1, NULL);
     994        2947 :         write_lock(&em_tree->lock);
     995        2947 :         em = lookup_extent_mapping(em_tree, start, len);
     996        2947 :         if (!em) {
     997           0 :                 ret = -EIO;
     998           0 :                 goto out_unlock;
     999             :         }
    1000             : 
    1001        2947 :         ASSERT(em->len == len);
    1002        2947 :         ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
    1003        2947 :         ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
    1004        2947 :         ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
    1005        2947 :         ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
    1006        2947 :         ASSERT(!list_empty(&em->list));
    1007             : 
    1008        2947 :         flags = em->flags;
    1009        2947 :         clear_bit(EXTENT_FLAG_PINNED, &em->flags);
    1010             : 
    1011             :         /* First, replace the em with a new extent_map starting from * em->start */
    1012        2947 :         split_pre->start = em->start;
    1013        2947 :         split_pre->len = pre;
    1014        2947 :         split_pre->orig_start = split_pre->start;
    1015        2947 :         split_pre->block_start = new_logical;
    1016        2947 :         split_pre->block_len = split_pre->len;
    1017        2947 :         split_pre->orig_block_len = split_pre->block_len;
    1018        2947 :         split_pre->ram_bytes = split_pre->len;
    1019        2947 :         split_pre->flags = flags;
    1020        2947 :         split_pre->compress_type = em->compress_type;
    1021        2947 :         split_pre->generation = em->generation;
    1022             : 
    1023        2947 :         replace_extent_mapping(em_tree, em, split_pre, 1);
    1024             : 
    1025             :         /*
    1026             :          * Now we only have an extent_map at:
    1027             :          *     [em->start, em->start + pre]
    1028             :          */
    1029             : 
    1030             :         /* Insert the middle extent_map. */
    1031        2947 :         split_mid->start = em->start + pre;
    1032        2947 :         split_mid->len = em->len - pre;
    1033        2947 :         split_mid->orig_start = split_mid->start;
    1034        2947 :         split_mid->block_start = em->block_start + pre;
    1035        2947 :         split_mid->block_len = split_mid->len;
    1036        2947 :         split_mid->orig_block_len = split_mid->block_len;
    1037        2947 :         split_mid->ram_bytes = split_mid->len;
    1038        2947 :         split_mid->flags = flags;
    1039        2947 :         split_mid->compress_type = em->compress_type;
    1040        2947 :         split_mid->generation = em->generation;
    1041        2947 :         add_extent_mapping(em_tree, split_mid, 1);
    1042             : 
    1043             :         /* Once for us */
    1044        2947 :         free_extent_map(em);
    1045             :         /* Once for the tree */
    1046        2947 :         free_extent_map(em);
    1047             : 
    1048        2947 : out_unlock:
    1049        2947 :         write_unlock(&em_tree->lock);
    1050        2947 :         unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
    1051        2947 :         free_extent_map(split_mid);
    1052        2947 : out_free_pre:
    1053        2947 :         free_extent_map(split_pre);
    1054        2947 :         return ret;
    1055             : }

Generated by: LCOV version 1.14